From 1a7de64ff4a769e77a4be6bf12343feb10ed05be Mon Sep 17 00:00:00 2001 From: seanshpark Date: Thu, 8 Dec 2022 13:31:29 +0900 Subject: [PATCH 001/567] [mlir] Revise GetMlirOpNameFromOpCode for custom/while This will revise GetMlirOpNameFromOpCode method to follow default name from EnumNameBuiltinOperator for custom and while as tfl dialect supports them. --- tensorflow/compiler/mlir/lite/flatbuffer_operator.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc index 3cb1edc466abce..9c050193775b68 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc @@ -65,15 +65,9 @@ StatusOr GetPaddingAttr(TfLitePadding pad_params, std::string mlir::GetMlirOpNameFromOpCode( const tflite::OperatorCodeT& op_code) { auto builtin_code = tflite::GetBuiltinCode(&op_code); - if (builtin_code == tflite::BuiltinOperator_CUSTOM) { - return std::string("tfl.custom"); - } if (builtin_code == tflite::BuiltinOperator_IF) { return std::string("tf.If"); } - if (builtin_code == tflite::BuiltinOperator_WHILE) { - return std::string("tfl.while"); - } llvm::StringRef op_name(tflite::EnumNameBuiltinOperator(builtin_code)); return llvm::Twine("tfl.", op_name.lower()).str(); From 450ff271f086150a9c1cb595a364b24c0c1bae56 Mon Sep 17 00:00:00 2001 From: DongHak Park Date: Thu, 2 Feb 2023 15:25:38 +0900 Subject: [PATCH 002/567] Typo Error in tensorflow/lite/schema/schema_utils.cc in tensorflow/lite/python/lite.py in tensorflow/compiler/mlir/lite/transforms/legalize_jax_random.cc There is typo error (trivial) --- .../compiler/mlir/lite/transforms/legalize_jax_random.cc | 2 +- tensorflow/lite/python/lite.py | 2 +- tensorflow/lite/schema/schema_utils.cc | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_jax_random.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_jax_random.cc index db99805c92e0b3..3492e375a33d88 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_jax_random.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_jax_random.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// The full pipline of converting jax random include 2 steps. +// The full pipeline of converting jax random include 2 steps. // 1. Rename the jax random functions to tflite wrapped functions with the aid // of "jax.named_call". For example, in the dumped hlo, the // jax.random.uniform will have name "tfl_wrapped_jax_random_uniform". diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 2f112d1dd8933a..c7ba41519f1e96 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -1624,7 +1624,7 @@ def _convert_concrete_functions_to_saved_model(self, output_dir): # Without the provided trackable obj, it is not able to serialize the given # concrete functions as a saved model format. Also when trackable obj is - # a function, use the original concrete function conversion pipline. + # a function, use the original concrete function conversion pipeline. if not self._trackable_obj or isinstance( self._trackable_obj, (_function.ConcreteFunction, _def_function.Function), diff --git a/tensorflow/lite/schema/schema_utils.cc b/tensorflow/lite/schema/schema_utils.cc index fc19290b862777..285873de24d84e 100644 --- a/tensorflow/lite/schema/schema_utils.cc +++ b/tensorflow/lite/schema/schema_utils.cc @@ -21,7 +21,7 @@ limitations under the License. namespace tflite { // The following GetBuiltinCode methods are the utility methods for reading -// builtin operatore code, ensuring compatibility issues between v3 and v3a +// builtin operator code, ensuring compatibility issues between v3 and v3a // schema. Always the maximum value of the two fields always will be the correct // value as follows: // @@ -29,7 +29,7 @@ namespace tflite { // // The `builtin_code` field is not available in the v3 models. Flatbuffer // library will feed zero value, which is the default value in the v3a schema. -// The actual builtin operatore code value will exist in the +// The actual builtin operator code value will exist in the // `deprecated_builtin_code` field. At the same time, it implies that // `deprecated_builtin_code` >= `builtin_code` and the maximum value of the two // fields will be same with `deprecated_builtin_code'. From 3a979e53b401508e7cab3147d348ff3577b108a8 Mon Sep 17 00:00:00 2001 From: Kun-Lu Date: Mon, 27 Mar 2023 16:05:19 -0400 Subject: [PATCH 003/567] Add byte-swapping for TFLite String TensorType on s390x Signed-off-by: Kun-Lu --- .../mlir/lite/flatbuffer_to_string.cc | 3 +- tensorflow/lite/core/model_builder.cc | 37 +++++++++++++------ tensorflow/lite/core/model_builder.h | 17 ++++++--- .../model_modifier/embedder_main.cc | 7 ++-- .../mini_benchmark/validator_test.cc | 2 +- tensorflow/lite/tools/flatbuffer_utils.py | 23 +++++++++++- 6 files changed, 66 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc b/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc index a51c4eaabf2c20..39c1c995050e9b 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc @@ -141,7 +141,8 @@ int main(int argc, char** argv) { std::string serialized_model; if (tflite::ReadAndVerify(argv[1], &serialized_model)) return 1; #if FLATBUFFERS_LITTLEENDIAN == 0 - tflite::FlatBufferModel::ByteSwapSerializedModel(&serialized_model); + if (std::string(argv[1]) == "-") + tflite::FlatBufferModel::ByteSwapSerializedModel(&serialized_model, true); #endif tflite::ToString(serialized_model); return 0; diff --git a/tensorflow/lite/core/model_builder.cc b/tensorflow/lite/core/model_builder.cc index 3684892bfb04d7..0b3239f78cd9e7 100644 --- a/tensorflow/lite/core/model_builder.cc +++ b/tensorflow/lite/core/model_builder.cc @@ -107,16 +107,25 @@ std::unique_ptr FlatBufferModel::VerifyAndBuildFromBuffer( #if FLATBUFFERS_LITTLEENDIAN == 0 -void FlatBufferModel::ByteSwapSerializedModel(std::string* serialized_model) { +void FlatBufferModel::ByteSwapSerializedModel(std::string* serialized_model, + bool from_big_endian) { const uint8_t* buffer = reinterpret_cast(serialized_model->c_str()); const tflite::Model* input_model = tflite::GetModel(buffer); - ByteSwapTFLiteModel(input_model); + ByteSwapTFLiteModel(input_model, from_big_endian); } void FlatBufferModel::ByteSwapBuffer(int8_t tensor_type, size_t buffer_size, - uint8_t* buffer) { + uint8_t* buffer, bool from_big_endian) { switch (tensor_type) { + case tflite::TensorType_STRING: { + auto bp = reinterpret_cast(buffer); + int num_of_strings = + from_big_endian ? bp[0] : flatbuffers::EndianSwap(bp[0]); + for (int i = 0; i < num_of_strings + 2; i++) + bp[i] = flatbuffers::EndianSwap(bp[i]); + break; + } // 16-bit types case tflite::TensorType_FLOAT16: case tflite::TensorType_INT16: @@ -151,7 +160,8 @@ void FlatBufferModel::ByteSwapBuffer(int8_t tensor_type, size_t buffer_size, } } -void FlatBufferModel::ByteSwapTFLiteModel(const tflite::Model* tfl_model) { +void FlatBufferModel::ByteSwapTFLiteModel(const tflite::Model* tfl_model, + bool from_big_endian) { bool buffer_swapped[tfl_model->buffers()->size()] = {}; for (size_t subgraph_idx = 0; subgraph_idx < tfl_model->subgraphs()->size(); subgraph_idx++) { @@ -167,7 +177,7 @@ void FlatBufferModel::ByteSwapTFLiteModel(const tflite::Model* tfl_model) { if (!buffer_ || !buffer_->data()) continue; auto* buffer = buffer_->data(); uint8_t* buff_ = const_cast(buffer->data()); - ByteSwapBuffer(tensor->type(), buffer->size(), buff_); + ByteSwapBuffer(tensor->type(), buffer->size(), buff_, from_big_endian); buffer_swapped[tensor->buffer()] = true; } } @@ -175,21 +185,25 @@ void FlatBufferModel::ByteSwapTFLiteModel(const tflite::Model* tfl_model) { } std::unique_ptr FlatBufferModel::ByteConvertModel( - std::unique_ptr model, ErrorReporter* error_reporter) { + std::unique_ptr model, ErrorReporter* error_reporter, + bool from_big_endian) { if (model == nullptr) return model; auto tfl_model = model->GetModel(); if (tfl_model->subgraphs()->size() == 0) return model; if (tfl_model->subgraphs()->Get(0)->tensors()->size() == 0) return model; - return ByteSwapFlatBufferModel(std::move(model), error_reporter); + if (tfl_model->buffers()->size() < 2) return model; + return ByteSwapFlatBufferModel(std::move(model), error_reporter, + from_big_endian); } std::unique_ptr FlatBufferModel::ByteSwapFlatBufferModel( - std::unique_ptr model, ErrorReporter* error_reporter) { + std::unique_ptr model, ErrorReporter* error_reporter, + bool from_big_endian) { FlatBufferModel* modelp = model.release(); auto tflite_model = modelp->GetModel(); auto copied_model = std::make_unique(); tflite_model->UnPackTo(copied_model.get(), nullptr); - ByteSwapTFLiteModelT(copied_model.get()); + ByteSwapTFLiteModelT(copied_model.get(), from_big_endian); std::unique_ptr builder( new flatbuffers::FlatBufferBuilder()); auto packed_model = tflite::Model::Pack(*builder, copied_model.get()); @@ -200,7 +214,8 @@ std::unique_ptr FlatBufferModel::ByteSwapFlatBufferModel( builder_->GetSize(), error_reporter); } -void FlatBufferModel::ByteSwapTFLiteModelT(tflite::ModelT* tfl_modelt) { +void FlatBufferModel::ByteSwapTFLiteModelT(tflite::ModelT* tfl_modelt, + bool from_big_endian) { size_t bytes_per_elem = 0; bool buffer_swapped[tfl_modelt->buffers.size()] = {}; for (size_t subgraph_idx = 0; subgraph_idx < tfl_modelt->subgraphs.size(); @@ -213,7 +228,7 @@ void FlatBufferModel::ByteSwapTFLiteModelT(tflite::ModelT* tfl_modelt) { const auto* buffer = &(tfl_modelt->buffers[tensor->buffer].get()->data); if (buffer && buffer->data()) { uint8_t* buff_ = const_cast(buffer->data()); - ByteSwapBuffer(tensor->type, buffer->size(), buff_); + ByteSwapBuffer(tensor->type, buffer->size(), buff_, from_big_endian); buffer_swapped[tensor->buffer] = true; } } diff --git a/tensorflow/lite/core/model_builder.h b/tensorflow/lite/core/model_builder.h index bcf5248d03cc47..a7202f133d5475 100644 --- a/tensorflow/lite/core/model_builder.h +++ b/tensorflow/lite/core/model_builder.h @@ -158,28 +158,33 @@ class FlatBufferModel { #if FLATBUFFERS_LITTLEENDIAN == 0 /// Byte swap a constant buffer in place. static void ByteSwapBuffer(int8_t tensor_type, size_t buffer_size, - uint8_t* buffer); + uint8_t* buffer, bool from_big_endian = true); /// Byte swap the buffers field of a TFLite Model instance in place. - static void ByteSwapTFLiteModel(const tflite::Model* tfl_model); + static void ByteSwapTFLiteModel(const tflite::Model* tfl_model, + bool from_big_endian = true); /// Byte swap the buffers field of a TFLite ModelT instance in place. - static void ByteSwapTFLiteModelT(tflite::ModelT* tfl_modelt); + static void ByteSwapTFLiteModelT(tflite::ModelT* tfl_modelt, + bool from_big_endian = true); /// Convert the TFLite buffers field between LE and BE format in a /// FlatBufferModel which is not empty and return the converted instance. static std::unique_ptr ByteConvertModel( std::unique_ptr model, - ErrorReporter* error_reporter = DefaultErrorReporter()); + ErrorReporter* error_reporter = DefaultErrorReporter(), + bool from_big_endian = false); /// Byte Swap the TFLite buffers field in a FlatBufferModel and return the /// swapped instance. static std::unique_ptr ByteSwapFlatBufferModel( std::unique_ptr model, - ErrorReporter* error_reporter = DefaultErrorReporter()); + ErrorReporter* error_reporter = DefaultErrorReporter(), + bool from_big_endian = false); /// Byte Swap the serialized String of a TFLite model in place. - static void ByteSwapSerializedModel(std::string* serialized_model); + static void ByteSwapSerializedModel(std::string* serialized_model, + bool from_big_endian = true); #endif // Releases memory or unmaps mmaped memory. diff --git a/tensorflow/lite/experimental/acceleration/mini_benchmark/model_modifier/embedder_main.cc b/tensorflow/lite/experimental/acceleration/mini_benchmark/model_modifier/embedder_main.cc index 0287f3d1dabf7c..86608c7191f07f 100644 --- a/tensorflow/lite/experimental/acceleration/mini_benchmark/model_modifier/embedder_main.cc +++ b/tensorflow/lite/experimental/acceleration/mini_benchmark/model_modifier/embedder_main.cc @@ -73,7 +73,7 @@ int RunEmbedder(const EmbedderOptions& options) { return 3; } #if FLATBUFFERS_LITTLEENDIAN == 0 - tflite::FlatBufferModel::ByteSwapSerializedModel(&main_model_contents); + tflite::FlatBufferModel::ByteSwapSerializedModel(&main_model_contents, false); #endif const Model* main_model = flatbuffers::GetRoot(main_model_contents.data()); @@ -87,7 +87,8 @@ int RunEmbedder(const EmbedderOptions& options) { return 4; } #if FLATBUFFERS_LITTLEENDIAN == 0 - tflite::FlatBufferModel::ByteSwapSerializedModel(&metrics_model_contents); + tflite::FlatBufferModel::ByteSwapSerializedModel(&metrics_model_contents, + false); #endif const Model* metrics_model = flatbuffers::GetRoot(metrics_model_contents.data()); @@ -134,7 +135,7 @@ int RunEmbedder(const EmbedderOptions& options) { return 7; } #if FLATBUFFERS_LITTLEENDIAN == 0 - tflite::FlatBufferModel::ByteSwapSerializedModel(&binary); + tflite::FlatBufferModel::ByteSwapSerializedModel(&binary, true); #endif f << binary; f.close(); diff --git a/tensorflow/lite/experimental/acceleration/mini_benchmark/validator_test.cc b/tensorflow/lite/experimental/acceleration/mini_benchmark/validator_test.cc index d51b84dd94ea9c..e6d756ce4244d8 100644 --- a/tensorflow/lite/experimental/acceleration/mini_benchmark/validator_test.cc +++ b/tensorflow/lite/experimental/acceleration/mini_benchmark/validator_test.cc @@ -121,7 +121,7 @@ TEST_F(ValidatorTest, HappyPathOnCpuWithCustomValidation) { reinterpret_cast(model_with_input.GetBufferPointer()), model_with_input.GetSize()); #if FLATBUFFERS_LITTLEENDIAN == 0 - tflite::FlatBufferModel::ByteSwapSerializedModel(&serialized_str); + tflite::FlatBufferModel::ByteSwapSerializedModel(&serialized_str, true); #endif std::string model_path = MiniBenchmarkTestHelper::DumpToTempFile( "mobilenet_quant_with_input.tflite", diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index 6c8dcf0a031fce..48da3ff04827d1 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -297,6 +297,23 @@ def byte_swap_buffer_content(buffer, chunksize, from_endiness, to_endiness): ) +def byte_swap_string_content(buffer, from_endiness, to_endiness): + """Helper function for byte-swapping the string buffer. + + Args: + buffer: TFLite string buffer of from_endiness format. + from_endiness: The original endianness format of the string buffer. + to_endiness: The destined endianness format of the string buffer. + """ + num_of_strings = int.from_bytes(buffer.data[0:4], from_endiness) + string_content = bytearray(buffer.data[4*(num_of_strings+2):]) + prefix_data = b''.join([int.from_bytes( + buffer.data[i:i+4], from_endiness).to_bytes( + 4, to_endiness) for i in range( + 0, (num_of_strings+1)*4+1, 4)]) + buffer.data = prefix_data + string_content + + def byte_swap_tflite_model_obj(model, from_endiness, to_endiness): """Byte swaps the buffers field in a TFLite model. @@ -334,7 +351,11 @@ def byte_swap_tflite_model_obj(model, from_endiness, to_endiness): and tensor.buffer not in buffer_swapped and model.buffers[tensor.buffer].data is not None ): - if tensor.type in types_of_16_bits: + if tensor.type == schema_fb.TensorType.STRING: + byte_swap_string_content( + model.buffers[tensor.buffer], from_endiness, to_endiness + ) + elif tensor.type in types_of_16_bits: byte_swap_buffer_content( model.buffers[tensor.buffer], 2, from_endiness, to_endiness ) From 058656893d223956c9867c99634fbb223655f4e3 Mon Sep 17 00:00:00 2001 From: Feiyue Chen Date: Thu, 13 Apr 2023 10:31:07 +0800 Subject: [PATCH 004/567] get data ranges for missing types --- tensorflow/lite/tools/utils.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/tools/utils.cc b/tensorflow/lite/tools/utils.cc index 0b43de6d779d74..846f76471f2ce1 100644 --- a/tensorflow/lite/tools/utils.cc +++ b/tensorflow/lite/tools/utils.cc @@ -153,8 +153,8 @@ void GetDataRangesForType(TfLiteType type, float* low_range, type == kTfLiteFloat64) { *low_range = -0.5f; *high_range = 0.5f; - } else if (type == kTfLiteInt64 || type == kTfLiteInt64 || - type == kTfLiteInt64 || type == kTfLiteInt64) { + } else if (type == kTfLiteInt64 || type == kTfLiteUInt64 || + type == kTfLiteInt32 || type == kTfLiteUInt32) { *low_range = 0; *high_range = 99; } else if (type == kTfLiteUInt8) { From 30699a7f6c4d810dc26eabaec6c8ae732a1c2e12 Mon Sep 17 00:00:00 2001 From: pjpratik <118897289+pjpratik@users.noreply.github.com> Date: Tue, 23 May 2023 22:04:44 +0530 Subject: [PATCH 005/567] Update dataset.py to be compatible with TF 2.x The functions in the dataset.py are outdated and deprecated in Tensorflow 2.x. This commit updates the code to be compatible with the latest TF versions by replacing the deprecated functions with their TensorFlow 2.x equivalents. Thanks. --- tensorflow/lite/tutorials/dataset.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/tutorials/dataset.py b/tensorflow/lite/tutorials/dataset.py index 92a669b4460ec5..bae3d17d473f31 100644 --- a/tensorflow/lite/tutorials/dataset.py +++ b/tensorflow/lite/tutorials/dataset.py @@ -36,7 +36,7 @@ def read32(bytestream): def check_image_file_header(filename): """Validate that filename corresponds to images for the MNIST dataset.""" - with tf.gfile.Open(filename, 'rb') as f: + with tf.io.gfile.Gfile(filename, 'rb') as f: magic = read32(f) read32(f) # num_images, unused rows = read32(f) @@ -63,17 +63,17 @@ def check_labels_file_header(filename): def download(directory, filename): """Download (and unzip) a file from the MNIST dataset if not already done.""" filepath = os.path.join(directory, filename) - if tf.gfile.Exists(filepath): + if tf.io.gfile.exists(filepath): return filepath - if not tf.gfile.Exists(directory): - tf.gfile.MakeDirs(directory) + if not tf.io.gfile.exists(directory): + tf.io.gfile.makedirs(directory) # CVDF mirror of http://yann.lecun.com/exdb/mnist/ url = 'https://storage.googleapis.com/cvdf-datasets/mnist/' + filename + '.gz' _, zipped_filepath = tempfile.mkstemp(suffix='.gz') print('Downloading %s to %s' % (url, zipped_filepath)) urllib.request.urlretrieve(url, zipped_filepath) with gzip.open(zipped_filepath, 'rb') as f_in, \ - tf.gfile.Open(filepath, 'wb') as f_out: + tf.io.gfile.Gfile(filepath, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(zipped_filepath) return filepath @@ -90,15 +90,15 @@ def dataset(directory, images_file, labels_file): def decode_image(image): # Normalize from [0, 255] to [0.0, 1.0] - image = tf.decode_raw(image, tf.uint8) + image = tf.io.decode_raw(image, tf.uint8) image = tf.cast(image, tf.float32) image = tf.reshape(image, [784]) return image / 255.0 def decode_label(label): - label = tf.decode_raw(label, tf.uint8) # tf.string -> [tf.uint8] + label = tf.io.decode_raw(label, tf.uint8) # tf.string -> [tf.uint8] label = tf.reshape(label, []) # label is a scalar - return tf.to_int32(label) + return tf.cast(label,tf.int32) images = tf.data.FixedLengthRecordDataset( images_file, 28 * 28, header_bytes=16).map(decode_image) From 4fa0913dfc9629a3b42c930938a4729440d23896 Mon Sep 17 00:00:00 2001 From: Kun-Lu Date: Wed, 7 Jun 2023 09:33:29 -0400 Subject: [PATCH 006/567] Remove the trailing space Signed-off-by: Kun-Lu --- tensorflow/lite/core/model_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/core/model_builder.cc b/tensorflow/lite/core/model_builder.cc index 0b3239f78cd9e7..a4df232fb791e6 100644 --- a/tensorflow/lite/core/model_builder.cc +++ b/tensorflow/lite/core/model_builder.cc @@ -120,7 +120,7 @@ void FlatBufferModel::ByteSwapBuffer(int8_t tensor_type, size_t buffer_size, switch (tensor_type) { case tflite::TensorType_STRING: { auto bp = reinterpret_cast(buffer); - int num_of_strings = + int num_of_strings = from_big_endian ? bp[0] : flatbuffers::EndianSwap(bp[0]); for (int i = 0; i < num_of_strings + 2; i++) bp[i] = flatbuffers::EndianSwap(bp[i]); From aa2a1ced77c5e47fb7e4cb2f80b768bfc7c63e6b Mon Sep 17 00:00:00 2001 From: Mike Corrigan Date: Fri, 7 Oct 2022 22:30:51 -0500 Subject: [PATCH 007/567] Fix GPU compile errors (OpenCL and OpenGL) Fix number of parameters in call to CLArguments::Init(). Add missing linkopts for EGL, math, and dlopen. Signed-off-by: Mike Corrigan --- tensorflow/lite/delegates/gpu/cl/BUILD | 1 + .../lite/delegates/gpu/cl/cl_arguments_test.cc | 5 +++-- tensorflow/lite/delegates/gpu/cl/testing/BUILD | 7 +++++++ tensorflow/lite/delegates/gpu/gl/BUILD | 4 ++++ tensorflow/lite/delegates/gpu/gl/kernels/BUILD | 17 +++++++++++++---- 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index d710059af90886..e9423c5b5dafcb 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -126,6 +126,7 @@ cc_test( deps = [ ":buffer", ":cl_arguments", + ":cl_test", ":gpu_object", "//tensorflow/lite/delegates/gpu/common:gpu_info", "@com_google_absl//absl/strings", diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc index 2a46c202286dd5..c9da770940968f 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "absl/strings/match.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_test.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" @@ -45,7 +46,7 @@ __kernel void main_function($0) { CLArguments cl_args; GpuInfo gpu_info; - ASSERT_OK(cl_args.Init(gpu_info, {}, nullptr, &args, &sample_code)); + ASSERT_OK(cl_args.Init(gpu_info, nullptr, &args, &sample_code)); EXPECT_TRUE(absl::StrContains(sample_code, "value = weights_buffer[id];")); EXPECT_TRUE( absl::StrContains(sample_code, "__global float4* weights_buffer")); @@ -67,7 +68,7 @@ TEST(CLArgumentsTest, TestNoSelector) { )"; CLArguments cl_args; GpuInfo gpu_info; - EXPECT_FALSE(cl_args.Init(gpu_info, {}, nullptr, &args, &sample_code).ok()); + EXPECT_FALSE(cl_args.Init(gpu_info, nullptr, &args, &sample_code).ok()); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/testing/BUILD b/tensorflow/lite/delegates/gpu/cl/testing/BUILD index f5c26496875348..ef5daedddf5186 100644 --- a/tensorflow/lite/delegates/gpu/cl/testing/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/testing/BUILD @@ -42,6 +42,13 @@ cc_test( cc_binary( name = "internal_api_samples", srcs = ["internal_api_samples.cc"], + linkopts = select({ + "//tensorflow:android": [ + "-lEGL", + "-lGLESv3", + ], + "//conditions:default": [] + }), tags = [ "nobuilder", "notap", diff --git a/tensorflow/lite/delegates/gpu/gl/BUILD b/tensorflow/lite/delegates/gpu/gl/BUILD index ff4c1663c0eec8..20e37c67b29f16 100644 --- a/tensorflow/lite/delegates/gpu/gl/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/BUILD @@ -213,6 +213,7 @@ cc_test( linkopts = [ "-lEGL", "-lGLESv2", + "-lm", ], tags = tf_gpu_tests_tags() + [ "local", @@ -446,6 +447,9 @@ cc_library( cc_test( name = "serialization_test", srcs = ["serialization_test.cc"], + linkopts = [ + "-lm", + ], tags = [ "local", "nobuilder", diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index c648b75cd0fd27..3abd79f5729431 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -45,6 +45,7 @@ cc_test( ], deps = [ ":converter", + ":test_util", "//tensorflow/lite/delegates/gpu/common:convert", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", @@ -717,10 +718,18 @@ cc_library( testonly = 1, srcs = ["test_util.cc"], hdrs = ["test_util.h"], - linkopts = [ - "-lEGL", - "-lGLESv3", - ], + linkopts = select({ + "//tensorflow:android": [ + "-lEGL", + "-lGLESv3", + "-ldl", + "-lm", + ], + "//conditions:default": [ + "-lEGL", + "-lGLESv3", + ], + }), deps = [ "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", From c55a3fa55c1e2a26e7d5aba3637ddbde7cf4a4ee Mon Sep 17 00:00:00 2001 From: Mike Corrigan Date: Thu, 8 Jun 2023 11:49:31 -0500 Subject: [PATCH 008/567] Refactor ReLU clip to activation_max Refactor ReLUAttributes.clip to activation_max to match TfLite activation naming conventions. Signed-off-by: Mike Corrigan --- .../lite/delegates/gpu/common/model_builder.cc | 12 ++++++------ .../delegates/gpu/common/model_builder_helper.cc | 2 +- tensorflow/lite/delegates/gpu/common/operations.h | 12 ++++++------ tensorflow/lite/delegates/gpu/common/tasks/relu.cc | 8 ++++---- .../delegates/gpu/common/tasks/relu_test_util.cc | 8 ++++---- tensorflow/lite/delegates/gpu/gl/kernels/relu.cc | 8 ++++---- .../lite/delegates/gpu/gl/kernels/relu_test.cc | 8 ++++---- 7 files changed, 29 insertions(+), 29 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 9702d8f4a828a8..481df15c42d04f 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -427,8 +427,8 @@ class ClampOperationsParser : public TFLiteOperationParser { // We replace clamp(...) with sequence of elementwise ops: // substaction -> usual relu with alpha = 0.0 -> addition. // node_sub = v0 = v - a // add op (add -a) - // node_relu = v1 = clamp(v0, 0.0, clip); // relu op alpha = 0.0, - // clip = b - a; + // node_relu = v1 = clamp(v0, 0.0, activation_max); // relu op alpha = 0.0, + // activation_max = b - a; // node_add = v2 = v1 + a // add op (add a) Node* node_sub = graph->NewNode(); Node* node_relu = graph->NewNode(); @@ -441,7 +441,7 @@ class ClampOperationsParser : public TFLiteOperationParser { ReLUAttributes relu_attr; relu_attr.alpha = 0.0f; - relu_attr.clip = clamp_b_ - clamp_a_; + relu_attr.activation_max = clamp_b_ - clamp_a_; node_relu->operation.type = ToString(OperationType::RELU); node_relu->operation.attributes = relu_attr; @@ -1937,7 +1937,7 @@ class QuantizeOperationParser : public TFLiteOperationParser { class ReLUOperationParser : public TFLiteOperationParser { public: - explicit ReLUOperationParser(int clip) : clip_(clip) {} + explicit ReLUOperationParser(int activation_max) : activation_max_(activation_max) {} absl::Status IsSupported(const TfLiteContext* context, const TfLiteNode* tflite_node, @@ -1957,13 +1957,13 @@ class ReLUOperationParser : public TFLiteOperationParser { const TfLiteLeakyReluParams* tf_options; auto status = RetrieveBuiltinData(tflite_node, &tf_options); attr.alpha = status.ok() ? tf_options->alpha : 0; - attr.clip = clip_; + attr.activation_max = activation_max_; node->operation.attributes = attr; return reader->AddOutputs(node); } private: - const int clip_; + const int activation_max_; }; class ResamplerOperationParser : public TFLiteOperationParser { diff --git a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc index c825d2be691d5f..313bf7decfbe6b 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc @@ -397,7 +397,7 @@ absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, case kTfLiteActReluN1To1: case kTfLiteActRelu6: { ReLUAttributes attr; - attr.clip = fused_activation == kTfLiteActRelu + attr.activation_max = fused_activation == kTfLiteActRelu ? 0.0f : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f); Node* activation_node; diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index 37d8dfa0fcdc6f..71d6d1aa0f6025 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -392,16 +392,16 @@ Padding3D CalculateSamePadding(const BHWDC& input, // f(x):= { // if x < 0 : x -> alpha * x -// if x >= 0 : x -> min(clip, x) +// if x >= 0 : x -> min(activation_max, x) // } // // Examples: -// - ReLU: clip = 0, alpha = 0 -// - ReLU6: clip = 6, alpha = 0 -// - Leaky ReLU: clip = 0, alpha = a +// - ReLU: activation_max = 0, alpha = 0 +// - ReLU6: activation_max = 6, alpha = 0 +// - Leaky ReLU: activation_max = 0, alpha = a struct ReLUAttributes { - // clip <= 0 mean it is not set. - float clip = 0; + // activation_max <= 0 mean it is not set. + float activation_max = 0; float alpha = 0; }; diff --git a/tensorflow/lite/delegates/gpu/common/tasks/relu.cc b/tensorflow/lite/delegates/gpu/common/tasks/relu.cc index 9afc2c7508eabb..a3b7e008a08aa5 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/relu.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/relu.cc @@ -37,14 +37,14 @@ ElementwiseDescriptor CreateReLU(const ReLUAttributes& attr, } else { min_func = "INIT_FLT4(0.0f)"; } - if (attr.clip != 0.0f) { + if (attr.activation_max != 0.0f) { if (precision == CalculationsPrecision::F32) { - result.args.AddFloat("clip", attr.clip); + result.args.AddFloat("activation_max", attr.activation_max); } else { - result.args.AddHalf("clip", half(attr.clip)); + result.args.AddHalf("activation_max", half(attr.activation_max)); } result.code = absl::StrCat("out_value = clamp(in_value, " + min_func + - ", INIT_FLT4(args.clip));"); + ", INIT_FLT4(args.activation_max));"); } else { result.code = absl::StrCat("out_value = max(in_value, ", min_func, ");"); } diff --git a/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc b/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc index 7c87bbb7823c0b..fefd5d3cf5759e 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc @@ -33,7 +33,7 @@ absl::Status ReLUNoClipNoAlphaTest(TestExecutionEnvironment* env) { ReLUAttributes attr; attr.alpha = 0.0f; - attr.clip = 0.0f; + attr.activation_max = 0.0f; for (auto precision : env->GetSupportedPrecisions()) { auto data_type = DeduceDataTypeFromPrecision(precision); @@ -62,7 +62,7 @@ absl::Status ReLUClipTest(TestExecutionEnvironment* env) { ReLUAttributes attr; attr.alpha = 0.0f; - attr.clip = 0.9f; + attr.activation_max = 0.9f; for (auto precision : env->GetSupportedPrecisions()) { auto data_type = DeduceDataTypeFromPrecision(precision); @@ -91,7 +91,7 @@ absl::Status ReLUAlphaTest(TestExecutionEnvironment* env) { ReLUAttributes attr; attr.alpha = 0.5f; - attr.clip = 0.0f; + attr.activation_max = 0.0f; for (auto precision : env->GetSupportedPrecisions()) { auto data_type = DeduceDataTypeFromPrecision(precision); @@ -120,7 +120,7 @@ absl::Status ReLUAlphaClipTest(TestExecutionEnvironment* env) { ReLUAttributes attr; attr.alpha = 0.5f; - attr.clip = 0.5f; + attr.activation_max = 0.5f; for (auto precision : env->GetSupportedPrecisions()) { auto data_type = DeduceDataTypeFromPrecision(precision); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc index 6d05ea894d4cfe..20051ae4195c85 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc @@ -39,7 +39,7 @@ class ReLU : public NodeShader { absl::Status GenerateCode(const GenerationContext& ctx, GeneratedCode* generated_code) const final { const auto& attr = std::any_cast(ctx.op_attr); - // clamp(value, min(0, alpha * value), clip) + // clamp(value, min(0, alpha * value), activation_max) std::vector params; std::string min; if (attr.alpha == 0) { @@ -49,11 +49,11 @@ class ReLU : public NodeShader { params.push_back({"alpha", attr.alpha}); } std::string code; - if (attr.clip == 0) { + if (attr.activation_max == 0) { code = "value_0 = max(value_0, " + min + ");"; } else { - code = "value_0 = clamp(value_0, " + min + ", vec4($clip$));"; - params.push_back({"clip", attr.clip}); + code = "value_0 = clamp(value_0, " + min + ", vec4($activation_max$));"; + params.push_back({"activation_max", attr.activation_max}); } *generated_code = { /*parameters=*/std::move(params), diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc index 0c2d2536fd654e..a1c54bfe69b761 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc @@ -45,7 +45,7 @@ class ReluTest : public ::testing::Test { TEST_F(ReluTest, Smoke) { OperationType op_type = OperationType::RELU; ReLUAttributes attr; - attr.clip = 0; + attr.activation_max = 0; attr.alpha = 0; SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, {GetTensorRef(1)}); @@ -58,7 +58,7 @@ TEST_F(ReluTest, Smoke) { TEST_F(ReluTest, ClipOnly) { OperationType op_type = OperationType::RELU; ReLUAttributes attr; - attr.clip = 6; + attr.activation_max = 6; attr.alpha = 0; SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, {GetTensorRef(1)}); @@ -71,7 +71,7 @@ TEST_F(ReluTest, ClipOnly) { TEST_F(ReluTest, AlphaOnly) { OperationType op_type = OperationType::RELU; ReLUAttributes attr; - attr.clip = 0; + attr.activation_max = 0; attr.alpha = 0.5; SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, {GetTensorRef(1)}); @@ -84,7 +84,7 @@ TEST_F(ReluTest, AlphaOnly) { TEST_F(ReluTest, ClipAndAlpha) { OperationType op_type = OperationType::RELU; ReLUAttributes attr; - attr.clip = 6; + attr.activation_max = 6; attr.alpha = 0.5; SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, {GetTensorRef(1)}); From 532124e333e9bcfe80209a1ddd437998760246ac Mon Sep 17 00:00:00 2001 From: Mike Corrigan Date: Mon, 10 Oct 2022 21:15:02 -0500 Subject: [PATCH 009/567] Fix ReLUN1To1 fused activation for OpenCL and OpenGL The ReLUN1To1 op was repurposed from the a ReLU1 op. However it was not fully reimplemented. As a result it clipped to a minimum of 0 instead of -1. Add a min clip attribute, attribute_min, to the ReLU implementation. Set to -1 for ReLUN1To1 and 0 for all other ReLUs. Use the ReLUOperationParser for ReLUN1To1 instead of the ClampoperationParser. Signed-off-by: Mike Corrigan --- .../delegates/gpu/cl/kernels/relu_test.cc | 21 +++ .../delegates/gpu/common/model_builder.cc | 13 +- .../gpu/common/model_builder_helper.cc | 1 + .../lite/delegates/gpu/common/operations.h | 17 ++- .../lite/delegates/gpu/common/tasks/relu.cc | 7 +- .../gpu/common/tasks/relu_test_util.cc | 125 ++++++++++++++++++ .../gpu/common/tasks/relu_test_util.h | 4 + .../lite/delegates/gpu/gl/kernels/relu.cc | 3 +- .../delegates/gpu/gl/kernels/relu_test.cc | 56 ++++++++ 9 files changed, 235 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc index 10a5c565c2437f..36984468fad976 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc @@ -46,6 +46,27 @@ TEST_F(OpenCLOperationTest, ReLUAlphaClip) { ASSERT_TRUE(status.ok()) << status.message(); } + +TEST_F(OpenCLOperationTest, ReLULN1NoClipNoAlpha) { + auto status = ReLUN1NoClipNoAlphaTest(&exec_env_); + ASSERT_TRUE(status.ok()) << status; +} + +TEST_F(OpenCLOperationTest, ReLUN1Clip) { + auto status = ReLUN1ClipTest(&exec_env_); + ASSERT_TRUE(status.ok()) << status; +} + +TEST_F(OpenCLOperationTest, ReLULN1Alpha) { + auto status = ReLUN1AlphaTest(&exec_env_); + ASSERT_TRUE(status.ok()) << status; +} + +TEST_F(OpenCLOperationTest, ReLUN1AlphaClip) { + auto status = ReLUN1AlphaClipTest(&exec_env_); + ASSERT_TRUE(status.ok()) << status; +} + } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 481df15c42d04f..3b15bb9043db59 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -1937,7 +1937,8 @@ class QuantizeOperationParser : public TFLiteOperationParser { class ReLUOperationParser : public TFLiteOperationParser { public: - explicit ReLUOperationParser(int activation_max) : activation_max_(activation_max) {} + explicit ReLUOperationParser(int activation_min, int activation_max) + : activation_min_(activation_min), activation_max_(activation_max) {} absl::Status IsSupported(const TfLiteContext* context, const TfLiteNode* tflite_node, @@ -1957,12 +1958,14 @@ class ReLUOperationParser : public TFLiteOperationParser { const TfLiteLeakyReluParams* tf_options; auto status = RetrieveBuiltinData(tflite_node, &tf_options); attr.alpha = status.ok() ? tf_options->alpha : 0; + attr.activation_min = activation_min_; attr.activation_max = activation_max_; node->operation.attributes = attr; return reader->AddOutputs(node); } private: + const int activation_min_; const int activation_max_; }; @@ -3190,13 +3193,13 @@ std::unique_ptr NewOperationParser( } break; case kTfLiteBuiltinRelu: - return std::make_unique(0); + return std::make_unique(0, 0); case kTfLiteBuiltinRelu6: - return std::make_unique(6); + return std::make_unique(0, 6); case kTfLiteBuiltinReluN1To1: - return std::make_unique(-1.0, 1.0); + return std::make_unique(-1.0, 1.0); case kTfLiteBuiltinLeakyRelu: - return std::make_unique(0); + return std::make_unique(0, 0); case kTfLiteBuiltinPrelu: return std::make_unique(); case kTfLiteBuiltinReshape: diff --git a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc index 313bf7decfbe6b..1772bc5d257907 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc @@ -400,6 +400,7 @@ absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, attr.activation_max = fused_activation == kTfLiteActRelu ? 0.0f : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f); + attr.activation_min = fused_activation == kTfLiteActReluN1To1 ? -1.0f : 0.0f; Node* activation_node; RETURN_IF_ERROR( NewPassthroughNode(graph, node, outputs[0], &activation_node)); diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index 71d6d1aa0f6025..ac384bbe801ea2 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -391,18 +391,25 @@ Padding3D CalculateSamePadding(const BHWDC& input, const DepthwiseConvolution3DAttributes& attr); // f(x):= { -// if x < 0 : x -> alpha * x -// if x >= 0 : x -> min(activation_max, x) +// if alpha != 0: x -> min(activation_max, x) +// else +// if x < activation_min : x -> min(activation_min, alpha * x) +// if x >= activation_min : x -> min(activation_max, x) // } // // Examples: -// - ReLU: activation_max = 0, alpha = 0 -// - ReLU6: activation_max = 6, alpha = 0 -// - Leaky ReLU: activation_max = 0, alpha = a +// - ReLU: activation_min = 0, activation_max = 0, alpha = 0 +// - ReLU6: activation_min = 0, activation_max = 6, alpha = 0 +// - Leaky ReLU: activation_min = 0, activation_max = 0, alpha = a +// - ReLUN1To1: activation_min = -1, activation_max = 1, alpha = 0 struct ReLUAttributes { + // activation_min must be < activation_max + float activation_min = 0; + // activation_max <= 0 mean it is not set. float activation_max = 0; + // alpha must be <= 1 float alpha = 0; }; diff --git a/tensorflow/lite/delegates/gpu/common/tasks/relu.cc b/tensorflow/lite/delegates/gpu/common/tasks/relu.cc index a3b7e008a08aa5..861ba496cc2b72 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/relu.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/relu.cc @@ -35,7 +35,12 @@ ElementwiseDescriptor CreateReLU(const ReLUAttributes& attr, result.args.AddHalf("alpha", half(attr.alpha)); } } else { - min_func = "INIT_FLT4(0.0f)"; + min_func = "INIT_FLT4(args.activation_min)"; + if (precision == CalculationsPrecision::F32) { + result.args.AddFloat("activation_min", attr.activation_min); + } else { + result.args.AddHalf("activation_min", half(attr.activation_min)); + } } if (attr.activation_max != 0.0f) { if (precision == CalculationsPrecision::F32) { diff --git a/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc b/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc index fefd5d3cf5759e..bd62c91ee26a7d 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.cc @@ -142,5 +142,130 @@ absl::Status ReLUAlphaClipTest(TestExecutionEnvironment* env) { return absl::OkStatus(); } +absl::Status ReLUN1NoClipNoAlphaTest(TestExecutionEnvironment* env) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 2, 1, 4); + src_tensor.data = {-12.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.2f}; + + ReLUAttributes attr; + attr.alpha = 0.0f; + attr.activation_min = -1.0f; + attr.activation_max = 0.0f; + + for (auto precision : env->GetSupportedPrecisions()) { + auto data_type = DeduceDataTypeFromPrecision(precision); + for (auto storage : env->GetSupportedStorages(data_type)) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + GPUOperation operation = CreateReLU(op_def, attr); + RETURN_IF_ERROR(env->ExecuteGPUOperation( + src_tensor, absl::make_unique(std::move(operation)), + BHWC(1, 2, 1, 4), &dst_tensor)); + RETURN_IF_ERROR( + PointWiseNear({-1.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.2f}, + dst_tensor.data, eps)); + } + } + return absl::OkStatus(); +} + +absl::Status ReLUN1ClipTest(TestExecutionEnvironment* env) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 2, 1, 4); + src_tensor.data = {-12.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.2f}; + + ReLUAttributes attr; + attr.alpha = 0.0f; + attr.activation_min = -1.0f; + attr.activation_max = 1.0f; + + for (auto precision : env->GetSupportedPrecisions()) { + auto data_type = DeduceDataTypeFromPrecision(precision); + for (auto storage : env->GetSupportedStorages(data_type)) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + GPUOperation operation = CreateReLU(op_def, attr); + RETURN_IF_ERROR(env->ExecuteGPUOperation( + src_tensor, absl::make_unique(std::move(operation)), + BHWC(1, 2, 1, 4), &dst_tensor)); + RETURN_IF_ERROR(PointWiseNear({-1.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 1.0f}, + dst_tensor.data, eps)); + } + } + return absl::OkStatus(); +} + +absl::Status ReLUN1AlphaTest(TestExecutionEnvironment* env) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 2, 1, 4); + src_tensor.data = {-12.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.2f}; + + ReLUAttributes attr; + attr.alpha = 1.0f; + attr.activation_min = -1.0f; // activation_min ignored if alpha != 0 + attr.activation_max = 0.0f; + + for (auto precision : env->GetSupportedPrecisions()) { + auto data_type = DeduceDataTypeFromPrecision(precision); + for (auto storage : env->GetSupportedStorages(data_type)) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + GPUOperation operation = CreateReLU(op_def, attr); + RETURN_IF_ERROR(env->ExecuteGPUOperation( + src_tensor, absl::make_unique(std::move(operation)), + BHWC(1, 2, 1, 4), &dst_tensor)); + RETURN_IF_ERROR(PointWiseNear({-12.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.2f}, + dst_tensor.data, eps)); + } + } + return absl::OkStatus(); +} + +absl::Status ReLUN1AlphaClipTest(TestExecutionEnvironment* env) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 2, 1, 4); + src_tensor.data = {-12.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.2f}; + + ReLUAttributes attr; + attr.alpha = 1.0f; + attr.activation_min = -1.0f; // activation_min ignored if alpha != 0 + attr.activation_max = 3.0f; + + for (auto precision : env->GetSupportedPrecisions()) { + auto data_type = DeduceDataTypeFromPrecision(precision); + for (auto storage : env->GetSupportedStorages(data_type)) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + GPUOperation operation = CreateReLU(op_def, attr); + RETURN_IF_ERROR(env->ExecuteGPUOperation( + src_tensor, absl::make_unique(std::move(operation)), + BHWC(1, 2, 1, 4), &dst_tensor)); + RETURN_IF_ERROR(PointWiseNear({-12.0f, -1.0f, -0.5f, 0.0f, 0.8f, -0.6f, 1.0f, 3.0f}, + dst_tensor.data, eps)); + } + } + return absl::OkStatus(); +} + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.h b/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.h index 92ed2eea5cbcd8..8c0a7023048962 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.h +++ b/tensorflow/lite/delegates/gpu/common/tasks/relu_test_util.h @@ -26,6 +26,10 @@ absl::Status ReLUNoClipNoAlphaTest(TestExecutionEnvironment* env); absl::Status ReLUClipTest(TestExecutionEnvironment* env); absl::Status ReLUAlphaTest(TestExecutionEnvironment* env); absl::Status ReLUAlphaClipTest(TestExecutionEnvironment* env); +absl::Status ReLUN1NoClipNoAlphaTest(TestExecutionEnvironment* env); +absl::Status ReLUN1ClipTest(TestExecutionEnvironment* env); +absl::Status ReLUN1AlphaTest(TestExecutionEnvironment* env); +absl::Status ReLUN1AlphaClipTest(TestExecutionEnvironment* env); } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc index 20051ae4195c85..5bfd0517df68eb 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc @@ -43,7 +43,8 @@ class ReLU : public NodeShader { std::vector params; std::string min; if (attr.alpha == 0) { - min = "vec4(0.0)"; + min = "vec4($activation_min$)"; + params.push_back({"activation_min", attr.activation_min}); } else { min = "min($alpha$ * value_0, 0.0)"; params.push_back({"alpha", attr.alpha}); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc index a1c54bfe69b761..d32fd7489fa50c 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/relu_test.cc @@ -94,6 +94,62 @@ TEST_F(ReluTest, ClipAndAlpha) { Pointwise(FloatNear(1e-6), {-3.0, 0.0, 2.0, 6.0})); } +TEST_F(ReluTest, ReLUN1Smoke) { + OperationType op_type = OperationType::RELU; + ReLUAttributes attr; + attr.activation_min = -1; + attr.activation_max = 0; + attr.alpha = 0; + SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, + {GetTensorRef(1)}); + ASSERT_TRUE(model.PopulateTensor(0, {-12.0f, -0.5f, 0.8f, 3.2f})); + ASSERT_OK(model.Invoke(*NewReLUNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-1.0f, -0.5f, 0.8f, 3.2f}) ); +} + +TEST_F(ReluTest, ReLUN1ClipOnly) { + OperationType op_type = OperationType::RELU; + ReLUAttributes attr; + attr.activation_min = -1; + attr.activation_max = 1; + attr.alpha = 0; + SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, + {GetTensorRef(1)}); + ASSERT_TRUE(model.PopulateTensor(0, {-12.0f, -0.5f, 0.8f, 3.2f})); + ASSERT_OK(model.Invoke(*NewReLUNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-1.0f, -0.5f, 0.8f, 1.0f})); +} + +TEST_F(ReluTest, ReLUN1AlphaOnly) { + OperationType op_type = OperationType::RELU; + ReLUAttributes attr; + attr.activation_min = -1; // activation_min ignored if alpha != 0 + attr.activation_max = 0; + attr.alpha = 0.5; + SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, + {GetTensorRef(1)}); + ASSERT_TRUE(model.PopulateTensor(0, {-6.0, 0.0, 2.0, 8.0})); + ASSERT_OK(model.Invoke(*NewReLUNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-3.0, 0.0, 2.0, 8.0})); +} + +TEST_F(ReluTest, ReLUN1ClipAndAlpha) { + OperationType op_type = OperationType::RELU; + ReLUAttributes attr; + attr.activation_min = -1; // activation_min ignored if alpha != 0 + attr.activation_max = 6; + attr.alpha = 0.5; + SingleOpModel model({ToString(op_type), attr}, {GetTensorRef(0)}, + {GetTensorRef(1)}); + ASSERT_TRUE(model.PopulateTensor(0, {-6.0, 0.0, 2.0, 8.0})); + ASSERT_OK(model.Invoke(*NewReLUNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-3.0, 0.0, 2.0, 6.0})); +} + } // namespace } // namespace gl } // namespace gpu From 34cb72d9d6fc3c0bc585048b3f419cf097f1c355 Mon Sep 17 00:00:00 2001 From: Kun-Lu Date: Wed, 12 Jul 2023 08:55:40 -0400 Subject: [PATCH 010/567] Add comments on using "-" Signed-off-by: Kun-Lu --- tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc b/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc index 39c1c995050e9b..b3e7e8e633e0da 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_to_string.cc @@ -141,6 +141,8 @@ int main(int argc, char** argv) { std::string serialized_model; if (tflite::ReadAndVerify(argv[1], &serialized_model)) return 1; #if FLATBUFFERS_LITTLEENDIAN == 0 + // If the flatbuffer model comes from stdin, convert its tensor content from + // BE to LE to ensure the output text string is the same as on LE platforms. if (std::string(argv[1]) == "-") tflite::FlatBufferModel::ByteSwapSerializedModel(&serialized_model, true); #endif From 2ee38c91ba5e8fd27d89c0a15838d720944e7d87 Mon Sep 17 00:00:00 2001 From: Harshavardhan Bellamkonda Date: Fri, 18 Aug 2023 20:27:38 +0530 Subject: [PATCH 011/567] Add CPU and GPU behavior notice and example to tf.nn.embedding_lookup function --- tensorflow/python/ops/embedding_ops.py | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 2fbd2643da45d1..3fbc582ead68f2 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -322,6 +322,34 @@ def embedding_lookup( Raises: ValueError: If `params` is empty. """ + """ + **Behavior Difference between CPU and GPU** + + Please note that when using `tf.nn.embedding_lookup` on a GPU, if an out-of-bound index is encountered, a value of 0 will be stored in the corresponding output value. + On the other hand, when using `tf.nn.embedding_lookup` on a CPU, an error will be returned if an out-of-bound index is found. + + This behavior difference can impact the results of your computation, especially when dealing with indices that may go beyond the bounds of the tensor. + Make sure to be mindful of this distinction when using the `tf.nn.embedding_lookup` function in your computations. + + **Usage Example** + + Here's an example demonstrating how to use `tf.nn.embedding_lookup`: + + ```python + import tensorflow as tf + + # Example embedding matrix and indices + embedding_matrix = tf.constant([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + indices = tf.constant([1, 0, 2]) + + # Perform embedding lookup + embeddings = tf.nn.embedding_lookup(embedding_matrix, indices) + + # Print the result + print("Embeddings:") + print(embeddings.numpy()) + ``` + """ return _embedding_lookup_and_transform( params=params, From 616aa8414f58f5477e6350bf53372c9a47042765 Mon Sep 17 00:00:00 2001 From: Harshavardhan Bellamkonda Date: Tue, 22 Aug 2023 20:47:32 +0530 Subject: [PATCH 012/567] Updated the file with necessary changes --- tensorflow/python/ops/embedding_ops.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 3fbc582ead68f2..37be47304ea8a2 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -322,14 +322,20 @@ def embedding_lookup( Raises: ValueError: If `params` is empty. """ + + """ **Behavior Difference between CPU and GPU** - Please note that when using `tf.nn.embedding_lookup` on a GPU, if an out-of-bound index is encountered, a value of 0 will be stored in the corresponding output value. - On the other hand, when using `tf.nn.embedding_lookup` on a CPU, an error will be returned if an out-of-bound index is found. + Please note that when using `tf.nn.embedding_lookup` on a GPU, if an out-of-bound + index is encountered, a value of 0 will be stored in the corresponding output value. + On the other hand, when using `tf.nn.embedding_lookup` on a CPU, an error will be + returned if an out-of-bound index is found. - This behavior difference can impact the results of your computation, especially when dealing with indices that may go beyond the bounds of the tensor. - Make sure to be mindful of this distinction when using the `tf.nn.embedding_lookup` function in your computations. + This behavior difference can impact the results of your computation, especially when + dealing with indices that may go beyond the bounds of the tensor. + Make sure to be mindful of this distinction when using the `tf.nn.embedding_lookup` + function in your computations. **Usage Example** From a94b7e777a2c2621aa80e7e15ae9e45c6bf2b5cd Mon Sep 17 00:00:00 2001 From: weihanmines Date: Fri, 11 Aug 2023 19:14:45 +0000 Subject: [PATCH 013/567] put rocm config back in bazelrc --- .bazelrc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.bazelrc b/.bazelrc index aad8e480ec4741..f341501b528b69 100644 --- a/.bazelrc +++ b/.bazelrc @@ -39,6 +39,7 @@ # tpu: Build TF with TPU support # cuda: Build with CUDA support. # cuda_clang Build with CUDA Clang support. +# rocm: Build with AMD GPU support (rocm) # mkl: Enable full mkl support. # tensorrt: Enable Tensorrt support. # noaws: Disable AWS S3 storage support @@ -274,6 +275,11 @@ build:tpu --define=with_tpu_support=true build:tensorrt --repo_env TF_NEED_TENSORRT=1 +build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain +build:rocm --define=using_rocm_hipcc=true +build:rocm --define=tensorflow_mkldnn_contraction_kernel=0 +build:rocm --repo_env TF_NEED_ROCM=1 + # Options to disable default on features build:noaws --define=no_aws_support=true build:nogcp --define=no_gcp_support=true @@ -579,4 +585,5 @@ build:release_gpu_windows --config=release_gpu_base build:android --config=no_tfrt build:macos --config=no_tfrt build:windows --config=no_tfrt +build:rocm --config=no_tfrt build:no_tfrt --deleted_packages=tensorflow/compiler/mlir/tfrt,tensorflow/compiler/mlir/tfrt/benchmarks,tensorflow/compiler/mlir/tfrt/ir,tensorflow/compiler/mlir/tfrt/ir/mlrt,tensorflow/compiler/mlir/tfrt/jit/python_binding,tensorflow/compiler/mlir/tfrt/jit/transforms,tensorflow/compiler/mlir/tfrt/python_tests,tensorflow/compiler/mlir/tfrt/tests,tensorflow/compiler/mlir/tfrt/tests/mlrt,tensorflow/compiler/mlir/tfrt/tests/ir,tensorflow/compiler/mlir/tfrt/tests/analysis,tensorflow/compiler/mlir/tfrt/tests/jit,tensorflow/compiler/mlir/tfrt/tests/lhlo_to_tfrt,tensorflow/compiler/mlir/tfrt/tests/lhlo_to_jitrt,tensorflow/compiler/mlir/tfrt/tests/tf_to_corert,tensorflow/compiler/mlir/tfrt/tests/tf_to_tfrt_data,tensorflow/compiler/mlir/tfrt/tests/saved_model,tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu,tensorflow/compiler/mlir/tfrt/transforms/mlrt,tensorflow/core/runtime_fallback,tensorflow/core/runtime_fallback/conversion,tensorflow/core/runtime_fallback/kernel,tensorflow/core/runtime_fallback/opdefs,tensorflow/core/runtime_fallback/runtime,tensorflow/core/runtime_fallback/util,tensorflow/core/runtime_fallback/test,tensorflow/core/runtime_fallback/test/gpu,tensorflow/core/runtime_fallback/test/saved_model,tensorflow/core/runtime_fallback/test/testdata,tensorflow/core/tfrt/stubs,tensorflow/core/tfrt/tfrt_session,tensorflow/core/tfrt/mlrt,tensorflow/core/tfrt/mlrt/attribute,tensorflow/core/tfrt/mlrt/kernel,tensorflow/core/tfrt/mlrt/bytecode,tensorflow/core/tfrt/mlrt/interpreter,tensorflow/compiler/mlir/tfrt/translate/mlrt,tensorflow/compiler/mlir/tfrt/translate/mlrt/testdata,tensorflow/core/tfrt/gpu,tensorflow/core/tfrt/run_handler_thread_pool,tensorflow/core/tfrt/runtime,tensorflow/core/tfrt/saved_model,tensorflow/core/tfrt/graph_executor,tensorflow/core/tfrt/saved_model/tests,tensorflow/core/tfrt/tpu,tensorflow/core/tfrt/utils,tensorflow/core/tfrt/utils/debug,tensorflow/core/tfrt/saved_model/python,tensorflow/core/tfrt/graph_executor/python,tensorflow/core/tfrt/saved_model/utils From 22430372eb3b29f5bf336295d214365e5f62f70c Mon Sep 17 00:00:00 2001 From: Akash Patel <17132214+acxz@users.noreply.github.com> Date: Sat, 9 Sep 2023 14:46:01 -0500 Subject: [PATCH 014/567] fix hipcc path --- third_party/gpus/rocm_configure.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 432f8391424037..f7f05391dfaaa5 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -751,7 +751,7 @@ def _create_local_rocm_repository(repository_ctx): tpl_paths["crosstool:clang/bin/crosstool_wrapper_driver_rocm"], { "%{cpu_compiler}": str(cc), - "%{hipcc_path}": rocm_config.rocm_toolkit_path + "/hip/bin/hipcc", + "%{hipcc_path}": rocm_config.rocm_toolkit_path + "/bin/hipcc", "%{hipcc_env}": _hipcc_env(repository_ctx), "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib", "%{rocr_runtime_library}": "hsa-runtime64", From b92b789a8ef69e6385d0814b188684270776d5b2 Mon Sep 17 00:00:00 2001 From: Joyce Brum Date: Tue, 12 Sep 2023 14:49:29 +0000 Subject: [PATCH 015/567] [StepSecurity] ci: Harden GitHub Actions Signed-off-by: Joyce Brum --- .github/workflows/arm-cd.yml | 3 +++ .github/workflows/arm-ci-extended-cpp.yml | 3 +++ .github/workflows/arm-ci-extended.yml | 3 +++ .github/workflows/arm-ci.yml | 3 +++ .github/workflows/release-branch-cherrypick.yml | 3 +++ .github/workflows/sigbuild-docker-branch.yml | 3 +++ .github/workflows/sigbuild-docker.yml | 3 +++ .github/workflows/stale-issues.yml | 3 +++ .github/workflows/trusted-partners.yml | 3 +++ .github/workflows/update-rbe.yml | 3 +++ 10 files changed, 30 insertions(+) diff --git a/.github/workflows/arm-cd.yml b/.github/workflows/arm-cd.yml index d01ccbf458d5b7..15433f8f14be32 100644 --- a/.github/workflows/arm-cd.yml +++ b/.github/workflows/arm-cd.yml @@ -24,6 +24,9 @@ on: schedule: - cron: '0 8 * * *' +permissions: + contents: read + jobs: build: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks diff --git a/.github/workflows/arm-ci-extended-cpp.yml b/.github/workflows/arm-ci-extended-cpp.yml index 7f75dbff659dad..e648297d37e789 100644 --- a/.github/workflows/arm-ci-extended-cpp.yml +++ b/.github/workflows/arm-ci-extended-cpp.yml @@ -22,6 +22,9 @@ on: schedule: - cron: '0 2 * * *' +permissions: + contents: read + jobs: build: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks diff --git a/.github/workflows/arm-ci-extended.yml b/.github/workflows/arm-ci-extended.yml index 7abfbd6f8cd030..01ce70ba82ecfa 100644 --- a/.github/workflows/arm-ci-extended.yml +++ b/.github/workflows/arm-ci-extended.yml @@ -22,6 +22,9 @@ on: schedule: - cron: '0 4 * * *' +permissions: + contents: read + jobs: build: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks diff --git a/.github/workflows/arm-ci.yml b/.github/workflows/arm-ci.yml index 73462a673dab03..96467ebaeb35a9 100644 --- a/.github/workflows/arm-ci.yml +++ b/.github/workflows/arm-ci.yml @@ -26,6 +26,9 @@ on: - master - r2.** +permissions: + contents: read + jobs: build: # Don't do this in forks, and if labeled, only for 'kokoro:force-run' diff --git a/.github/workflows/release-branch-cherrypick.yml b/.github/workflows/release-branch-cherrypick.yml index 5ff69e468057c4..87b33bb4a0fd8c 100644 --- a/.github/workflows/release-branch-cherrypick.yml +++ b/.github/workflows/release-branch-cherrypick.yml @@ -35,6 +35,9 @@ on: required: true type: string +permissions: + contents: read + jobs: cherrypick: name: Cherrypick to ${{ github.event.inputs.release_branch}} - ${{ github.event.inputs.git_commit }} diff --git a/.github/workflows/sigbuild-docker-branch.yml b/.github/workflows/sigbuild-docker-branch.yml index d48ef98608e758..2a1ba68891123d 100644 --- a/.github/workflows/sigbuild-docker-branch.yml +++ b/.github/workflows/sigbuild-docker-branch.yml @@ -25,6 +25,9 @@ on: branches: - "r[1-9].[0-9]+" +permissions: + contents: read + jobs: docker: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks diff --git a/.github/workflows/sigbuild-docker.yml b/.github/workflows/sigbuild-docker.yml index a7f9477929389d..810e9ec55a5317 100644 --- a/.github/workflows/sigbuild-docker.yml +++ b/.github/workflows/sigbuild-docker.yml @@ -28,6 +28,9 @@ on: branches: - master +permissions: + contents: read + jobs: docker: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks diff --git a/.github/workflows/stale-issues.yml b/.github/workflows/stale-issues.yml index cb8d8639a11ffb..84118acca683fd 100644 --- a/.github/workflows/stale-issues.yml +++ b/.github/workflows/stale-issues.yml @@ -18,6 +18,9 @@ on: schedule: - cron: "30 1 * * *" +permissions: + contents: read + jobs: close-issues: # Don't do this in forks diff --git a/.github/workflows/trusted-partners.yml b/.github/workflows/trusted-partners.yml index 0b64aad201cc13..f6f40518fe5edf 100644 --- a/.github/workflows/trusted-partners.yml +++ b/.github/workflows/trusted-partners.yml @@ -17,6 +17,9 @@ name: Trusted Partner PR on: pull_request_target: +permissions: + contents: read + jobs: assign-partner-tags: runs-on: ubuntu-latest diff --git a/.github/workflows/update-rbe.yml b/.github/workflows/update-rbe.yml index a07b6f9e38e5a0..ca22041782fa4f 100644 --- a/.github/workflows/update-rbe.yml +++ b/.github/workflows/update-rbe.yml @@ -20,6 +20,9 @@ name: Update RBE Configs on: workflow_dispatch: +permissions: + contents: read + jobs: rbe: name: Update RBE Configs From 56803aae482fe077b6fb94e2f4ba96d9ab885de7 Mon Sep 17 00:00:00 2001 From: Joyce Brum Date: Tue, 12 Sep 2023 16:40:29 +0000 Subject: [PATCH 016/567] set permissions on workflows Signed-off-by: Joyce Brum --- .github/workflows/cffconvert.yml | 3 +++ .github/workflows/issue-on-pr-rollback.yml | 6 ++++++ .github/workflows/pylint-presubmit.yml | 3 +++ .github/workflows/sigbuild-docker-presubmit.yml | 6 ++++++ 4 files changed, 18 insertions(+) diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml index 21ac759f3ef656..49e240867ec257 100644 --- a/.github/workflows/cffconvert.yml +++ b/.github/workflows/cffconvert.yml @@ -20,6 +20,9 @@ on: paths: - CITATION.cff +permissions: + contents: read + jobs: validate: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks diff --git a/.github/workflows/issue-on-pr-rollback.yml b/.github/workflows/issue-on-pr-rollback.yml index fa76923a2ba770..459b99b4bee501 100644 --- a/.github/workflows/issue-on-pr-rollback.yml +++ b/.github/workflows/issue-on-pr-rollback.yml @@ -18,10 +18,16 @@ on: push: branches: - master + +permissions: {} jobs: create-issue-on-pr-rollback: runs-on: ubuntu-latest + permissions: + contents: read + issues: write + pull-requests: read if: | github.repository == 'tensorflow/tensorflow' && startsWith(github.event.head_commit.message, 'Rollback of PR #') diff --git a/.github/workflows/pylint-presubmit.yml b/.github/workflows/pylint-presubmit.yml index e97f34472d8356..53e2253ad4d4a8 100644 --- a/.github/workflows/pylint-presubmit.yml +++ b/.github/workflows/pylint-presubmit.yml @@ -19,6 +19,9 @@ on: paths: - '**.py' +permissions: + contents: read + jobs: build: name: PyLint diff --git a/.github/workflows/sigbuild-docker-presubmit.yml b/.github/workflows/sigbuild-docker-presubmit.yml index 6450359835fafe..eed3af5b5cf380 100644 --- a/.github/workflows/sigbuild-docker-presubmit.yml +++ b/.github/workflows/sigbuild-docker-presubmit.yml @@ -23,6 +23,9 @@ on: - 'tensorflow/tools/tf_sig_build_dockerfiles/**' - '!tensorflow/tools/tf_sig_build_dockerfiles/README.md' +permissions: + contents: read + jobs: docker: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks @@ -30,6 +33,9 @@ jobs: strategy: matrix: python-version: [python3.9, python3.10, python3.11] + permissions: + contents: read + pull-requests: write steps: - name: Checkout From 9f729bcaa23bb337e4005ffd220bd2b27e9cfe33 Mon Sep 17 00:00:00 2001 From: Joyce Date: Tue, 12 Sep 2023 13:45:56 -0300 Subject: [PATCH 017/567] set update-nightly.yml permissions Signed-off-by: Joyce --- .github/workflows/update-nightly.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/update-nightly.yml b/.github/workflows/update-nightly.yml index 60372fddd272bf..282f030a31ff9f 100644 --- a/.github/workflows/update-nightly.yml +++ b/.github/workflows/update-nightly.yml @@ -18,10 +18,15 @@ on: schedule: - cron: 0 4 * * * # 4am UTC is 9pm PDT and 8pm PST name: Set nightly branch to master HEAD + +permissions: {} + jobs: master-to-nightly: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks runs-on: ubuntu-latest + permissions: + contents: write steps: - uses: zofrex/mirror-branch@a8809f0b42f9dfe9b2c5c2162a46327c23d15266 # v1.0.3 name: Set nightly branch to master HEAD From 17d8892757be72019201e79981d311fd2a12b31e Mon Sep 17 00:00:00 2001 From: William Muir Date: Sat, 16 Sep 2023 22:47:34 -0500 Subject: [PATCH 018/567] Strip `external/local_tsl` prefix during tar of tsl c headers --- tensorflow/tools/lib_package/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 578d2e3f2f5bfe..1c6af3884bc9ce 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -73,7 +73,7 @@ pkg_tar( "//tensorflow/c:headers", ], package_dir = "include/", - strip_prefix = "/", + strip_prefix = "/external/local_tsl", # Mark as "manual" till # https://github.com/bazelbuild/bazel/issues/2352 # and https://github.com/bazelbuild/bazel/issues/1580 From 29abb6ac7121bed646f3cd832511372626e7916c Mon Sep 17 00:00:00 2001 From: Fadi Arafeh Date: Fri, 15 Sep 2023 13:10:50 +0000 Subject: [PATCH 019/567] Update oneDNN to v3.2.1 for aarch64 and add fp32-bf16 JIT reorder patch --- tensorflow/workspace2.bzl | 13 +- third_party/mkl_dnn/mkldnn_acl.BUILD | 9 +- .../onednn_acl_depthwise_convolution.patch | 356 -- .../onednn_acl_fixed_format_kernels.patch | 1166 ----- .../onednn_acl_fp32_bf16_reorder.patch | 111 + .../mkl_dnn/onednn_acl_remove_winograd.patch | 326 -- third_party/mkl_dnn/onednn_acl_reorder.patch | 17 +- .../mkl_dnn/onednn_acl_reorder_padded.patch | 858 ---- .../mkl_dnn/onednn_acl_reorder_update.patch | 4193 ----------------- .../onednn_acl_thread_local_scheduler.patch | 65 +- .../mkl_dnn/onednn_acl_threadcap.patch | 12 +- .../onednn_acl_threadpool_scheduler.patch | 45 - .../xla/third_party/mkl_dnn/mkldnn_acl.BUILD | 9 +- .../tsl/third_party/mkl_dnn/mkldnn_acl.BUILD | 9 +- .../third_party/tsl/tsl/mkl/build_defs.bzl | 2 +- .../xla/third_party/tsl/workspace2.bzl | 19 +- 16 files changed, 188 insertions(+), 7022 deletions(-) delete mode 100644 third_party/mkl_dnn/onednn_acl_depthwise_convolution.patch delete mode 100644 third_party/mkl_dnn/onednn_acl_fixed_format_kernels.patch create mode 100644 third_party/mkl_dnn/onednn_acl_fp32_bf16_reorder.patch delete mode 100644 third_party/mkl_dnn/onednn_acl_remove_winograd.patch delete mode 100644 third_party/mkl_dnn/onednn_acl_reorder_padded.patch delete mode 100644 third_party/mkl_dnn/onednn_acl_reorder_update.patch delete mode 100644 third_party/mkl_dnn/onednn_acl_threadpool_scheduler.patch diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl index fd6071f091e1b6..22d25f101488aa 100644 --- a/tensorflow/workspace2.bzl +++ b/tensorflow/workspace2.bzl @@ -205,18 +205,13 @@ def _tf_repositories(): build_file = "//third_party/mkl_dnn:mkldnn_acl.BUILD", patch_file = [ "//third_party/mkl_dnn:onednn_acl_threadcap.patch", - "//third_party/mkl_dnn:onednn_acl_remove_winograd.patch", - "//third_party/mkl_dnn:onednn_acl_fixed_format_kernels.patch", - "//third_party/mkl_dnn:onednn_acl_depthwise_convolution.patch", - "//third_party/mkl_dnn:onednn_acl_threadpool_scheduler.patch", - "//third_party/mkl_dnn:onednn_acl_reorder_padded.patch", - "//third_party/mkl_dnn:onednn_acl_reorder_update.patch", "//third_party/mkl_dnn:onednn_acl_reorder.patch", "//third_party/mkl_dnn:onednn_acl_thread_local_scheduler.patch", + "//third_party/mkl_dnn:onednn_acl_fp32_bf16_reorder.patch", ], - sha256 = "a50993aa6265b799b040fe745e0010502f9f7103cc53a9525d59646aef006633", - strip_prefix = "oneDNN-2.7.3", - urls = tf_mirror_urls("https://github.com/oneapi-src/oneDNN/archive/v2.7.3.tar.gz"), + sha256 = "2f76b407ef8893cca71340f88cd800019a1f14f8ac1bbdbb89a84be1370b52e3", + strip_prefix = "oneDNN-3.2.1", + urls = tf_mirror_urls("https://github.com/oneapi-src/oneDNN/archive/refs/tags/v3.2.1.tar.gz"), ) tf_http_archive( diff --git a/third_party/mkl_dnn/mkldnn_acl.BUILD b/third_party/mkl_dnn/mkldnn_acl.BUILD index a1085427ec08da..0653bcb5523941 100644 --- a/third_party/mkl_dnn/mkldnn_acl.BUILD +++ b/third_party/mkl_dnn/mkldnn_acl.BUILD @@ -27,6 +27,7 @@ _DNNL_RUNTIME_THREADPOOL = { "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", + "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", @@ -74,6 +75,7 @@ _DNNL_RUNTIME_OMP = { "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", + "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", @@ -124,9 +126,9 @@ expand_template( name = "dnnl_version_h", out = "include/oneapi/dnnl/dnnl_version.h", substitutions = { - "@DNNL_VERSION_MAJOR@": "2", - "@DNNL_VERSION_MINOR@": "7", - "@DNNL_VERSION_PATCH@": "3", + "@DNNL_VERSION_MAJOR@": "3", + "@DNNL_VERSION_MINOR@": "2", + "@DNNL_VERSION_PATCH@": "1", "@DNNL_VERSION_HASH@": "N/A", }, template = "include/oneapi/dnnl/dnnl_version.h.in", @@ -142,6 +144,7 @@ cc_library( ], exclude = [ "src/cpu/x64/**", + "src/cpu/rv64/**", ], ), copts = select({ diff --git a/third_party/mkl_dnn/onednn_acl_depthwise_convolution.patch b/third_party/mkl_dnn/onednn_acl_depthwise_convolution.patch deleted file mode 100644 index 950077665fb4b7..00000000000000 --- a/third_party/mkl_dnn/onednn_acl_depthwise_convolution.patch +++ /dev/null @@ -1,356 +0,0 @@ - ******************************************************************************* - Copyright 2023 Arm Limited and affiliates. - SPDX-License-Identifier: Apache-2.0 - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ******************************************************************************* -diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp -index 6b57374643..85e45ace9d 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.cpp -+++ b/src/cpu/aarch64/acl_convolution_utils.cpp -@@ -48,11 +48,14 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - if (!is_fwd) return status::unimplemented; - - const int ndims = src_d.ndims(); -+ const bool is_depthwise = wei_d.ndims() == 5 && wei_d.dims()[1] == 1 -+ && wei_d.dims()[2] == 1; - -- ACL_CHECK_SUPPORT(ndims != 4, " only supports 2 spatial dimensions"); -+ ACL_CHECK_SUPPORT( -+ ndims != 4 && !is_depthwise, " only supports 2 spatial dimensions"); - - const int with_groups = wei_d.ndims() == src_d.ndims() + 1; -- ACL_CHECK_SUPPORT(with_groups, " does not support groups"); -+ ACL_CHECK_SUPPORT(with_groups && !is_depthwise, " does not support groups"); - - ACL_CHECK_SUPPORT(src_d.data_type() != data_type::f32 - || wei_d.data_type() != data_type::f32 -@@ -108,7 +111,8 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - - acp.with_bias = cd.bias_desc.format_kind != format_kind::undef; - -- if (wei_d.format_kind() != format_kind::any) return status::unimplemented; -+ if (wei_d.format_kind() != format_kind::any && !is_depthwise) -+ return status::unimplemented; - - auto src_tag = memory_desc_matches_one_of_tag( - src_md, format_tag::nhwc, format_tag::nchw); -@@ -138,8 +142,12 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - || src_tag != dst_tag) - return status::unimplemented; - -- // Set weights to initially be the same as src -- CHECK(memory_desc_init_by_tag(weights_md, src_tag)); -+ if (is_depthwise) { -+ CHECK(memory_desc_init_by_tag(weights_md, format_tag::hwigo)); -+ } else { -+ // Set weights to initially be the same as src -+ CHECK(memory_desc_init_by_tag(weights_md, src_tag)); -+ } - - // Bias is just 1D, set to be the obvious format - if (acp.with_bias && bias_md.format_kind == format_kind::any) -@@ -166,6 +174,11 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - 1, - acl_data_type, - acl_layout); -+ if(is_depthwise) { -+ // We need to set that values are not constant so that we -+ // we can update them in-place in ACL -+ acp.wei_tensor_info.set_are_values_constant(false); -+ } - - acp.dst_tensor_info = arm_compute::TensorInfo( - is_nhwc ? arm_compute::TensorShape(oc, ow, oh, mb) : -@@ -185,6 +198,11 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - // Are we allowed to cast down to bf16 or not? - acp.fast_math - = one_of(attr.fpmath_mode_, fpmath_mode::bf16, fpmath_mode::any); -+ if (is_depthwise) { -+ // There is no support for fixed format kernels for depthwise convolution -+ // in ACL so we are going to use weight format that we set up earlier -+ return status::success; -+ } - - // WeightFormat::ANY tells ACL we can handle any format - acp.weights_info = arm_compute::WeightsInfo( -@@ -252,6 +270,7 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - memory_desc_t &weights_md, memory_desc_t &dst_md, - memory_desc_t &bias_md, const convolution_desc_t &cd, - const primitive_attr_t &attr) { -+ if (weights_md.ndims != 4) return status::unimplemented; - - // General Compute Library checks, memory tags are also set there - CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); -@@ -277,6 +296,7 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - memory_desc_t &weights_md, memory_desc_t &dst_md, - memory_desc_t &bias_md, const convolution_desc_t &cd, - const primitive_attr_t &attr) { -+ if (weights_md.ndims != 4) return status::unimplemented; - - // Indirect is slower for small convolution kernels - if (weights_md.dims[2] == 1 && weights_md.dims[3] == 1) -@@ -314,6 +334,22 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - return status::success; - } - -+status_t init_conf_depthwise(acl_conv_conf_t &acp, memory_desc_t &src_md, -+ memory_desc_t &weights_md, memory_desc_t &dst_md, -+ memory_desc_t &bias_md, const convolution_desc_t &cd, -+ const primitive_attr_t &attr) { -+ if (weights_md.ndims != 5) return status::unimplemented; -+ -+ CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); -+ -+ ACL_CHECK_VALID(arm_compute::NEDepthwiseConvolutionLayer::validate( -+ &acp.src_tensor_info, &acp.wei_tensor_info, -+ acp.with_bias ? &acp.bia_tensor_info : nullptr, -+ &acp.dst_tensor_info, acp.padstride_info)); -+ -+ return status::success; -+} -+ - } // namespace acl_convolution_utils - - } // namespace aarch64 -diff --git a/src/cpu/aarch64/acl_convolution_utils.hpp b/src/cpu/aarch64/acl_convolution_utils.hpp -index e3d40a5e75..1ded5826c4 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.hpp -+++ b/src/cpu/aarch64/acl_convolution_utils.hpp -@@ -66,6 +66,11 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - memory_desc_t &bias_md, const convolution_desc_t &cd, - const primitive_attr_t &attr); - -+status_t init_conf_depthwise(acl_conv_conf_t &acp, memory_desc_t &src_md, -+ memory_desc_t &weights_md, memory_desc_t &dst_md, -+ memory_desc_t &bias_md, const convolution_desc_t &cd, -+ const primitive_attr_t &attr); -+ - } // namespace acl_convolution_utils - - template _lock {this->mtx}; -+ -+ auto *acl_resource -+ = ctx.get_resource_mapper() -+ ->get(this); -+ acl_obj_t &acl_depthwise_obj -+ = acl_resource->get_acl_obj(); -+ -+ return execute_forward_conv_acl< -+ acl_obj_t, pd_t, data_t>( -+ ctx, acl_depthwise_obj, pd()); -+} -+ -+} // namespace aarch64 -+} // namespace cpu -+} // namespace impl -+} // namespace dnnl -diff --git a/src/cpu/aarch64/acl_depthwise_convolution.hpp b/src/cpu/aarch64/acl_depthwise_convolution.hpp -new file mode 100644 -index 0000000000..3e3d02cf41 ---- /dev/null -+++ b/src/cpu/aarch64/acl_depthwise_convolution.hpp -@@ -0,0 +1,141 @@ -+/******************************************************************************* -+* Copyright 2023 Arm Ltd. and affiliates -+* -+* Licensed under the Apache License, Version 2.0 (the "License"); -+* you may not use this file except in compliance with the License. -+* You may obtain a copy of the License at -+* -+* http://www.apache.org/licenses/LICENSE-2.0 -+* -+* Unless required by applicable law or agreed to in writing, software -+* distributed under the License is distributed on an "AS IS" BASIS, -+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+* See the License for the specific language governing permissions and -+* limitations under the License. -+*******************************************************************************/ -+ -+#ifndef CPU_AARCH64_ACL_DEPTHWISE_CONVOLUTION_HPP -+#define CPU_AARCH64_ACL_DEPTHWISE_CONVOLUTION_HPP -+ -+#include "cpu/aarch64/acl_convolution_utils.hpp" -+#include "cpu/cpu_convolution_pd.hpp" -+ -+namespace dnnl { -+namespace impl { -+namespace cpu { -+namespace aarch64 { -+ -+struct acl_depthwise_convolution_resource_t : public resource_t { -+ acl_depthwise_convolution_resource_t() -+ : acl_obj_(utils::make_unique< -+ acl_obj_t>()) {} -+ -+ status_t configure(const acl_conv_conf_t &acp) { -+ if (!acl_obj_) return status::out_of_memory; -+ -+ acl_obj_->src_tensor.allocator()->init(acp.src_tensor_info); -+ acl_obj_->wei_tensor.allocator()->init(acp.wei_tensor_info); -+ acl_obj_->dst_tensor.allocator()->init(acp.dst_tensor_info); -+ acl_obj_->bia_tensor.allocator()->init(acp.bia_tensor_info); -+ -+ // clang-format off -+ acl_obj_->conv.configure( -+ &acl_obj_->src_tensor, -+ &acl_obj_->wei_tensor, -+ acp.with_bias ? &acl_obj_->bia_tensor : nullptr, -+ &acl_obj_->dst_tensor, -+ acp.padstride_info, -+ 1, // depth multiplier default value -+ acp.act_info); -+ -+ // clang-format on -+ return status::success; -+ } -+ -+ acl_obj_t &get_acl_obj() const { -+ return *acl_obj_; -+ } -+ -+ DNNL_DISALLOW_COPY_AND_ASSIGN(acl_depthwise_convolution_resource_t); -+ -+private: -+ std::unique_ptr> -+ acl_obj_; -+}; -+ -+struct acl_depthwise_convolution_fwd_t : public primitive_t { -+ -+ struct pd_t : public cpu_convolution_fwd_pd_t { -+ pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, -+ const typename pd_t::base_class *hint_fwd_pd) -+ : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), acp_() {} -+ -+ DECLARE_COMMON_PD_T("depthwise_convolution:acl", -+ acl_depthwise_convolution_fwd_t, USE_GLOBAL_SCRATCHPAD); -+ -+ status_t init(engine_t *engine) { -+ using namespace data_type; -+ -+ const bool is_fp16_ok = expect_data_types(f16, f16, f16, f16, undef) -+ && attr()->has_default_values( -+ primitive_attr_t::skip_mask_t::post_ops, f16); -+ const bool is_fp32_ok = expect_data_types(f32, f32, f32, f32, undef) -+ && attr()->has_default_values( -+ primitive_attr_t::skip_mask_t::post_ops, f32); -+ bool ok = is_fwd() -+ && set_default_alg_kind(alg_kind::convolution_direct) -+ && utils::one_of(true, is_fp16_ok, is_fp32_ok) -+ && !has_zero_dim_memory(); -+ if (!ok) return status::unimplemented; -+ -+ CHECK(acl_convolution_utils::init_conf_depthwise(acp_, src_md_, -+ weights_md_, dst_md_, bias_md_, *desc(), *attr())); -+ -+ CHECK(post_ops.init( -+ engine, attr_.post_ops_, dst_md_, acp_.act_info)); -+ acp_.use_dst_acc = post_ops.has_sum(); -+ -+ return status::success; -+ } -+ -+ acl_conv_conf_t acp_; -+ -+ acl_post_ops_t post_ops; -+ }; -+ -+ acl_depthwise_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {} -+ -+ status_t create_resource( -+ engine_t *engine, resource_mapper_t &mapper) const override { -+ if (mapper.has_resource(this)) return status::success; -+ -+ auto r = utils::make_unique(); -+ if (!r) return status::out_of_memory; -+ -+ CHECK(r->configure(pd()->acp_)); -+ mapper.add(this, std::move(r)); -+ -+ CHECK(pd()->post_ops.create_resource(engine, mapper)); -+ -+ return status::success; -+ } -+ -+ typedef typename prec_traits::type data_t; -+ -+ status_t execute(const exec_ctx_t &ctx) const override { -+ return execute_forward(ctx); -+ } -+ -+private: -+ mutable std::mutex mtx; -+ status_t execute_forward(const exec_ctx_t &ctx) const; -+ -+ const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } -+}; -+ -+} // namespace aarch64 -+} // namespace cpu -+} // namespace impl -+} // namespace dnnl -+ -+#endif // CPU_AARCH64_ACL_DEPTHWISE_CONVOLUTION_HPP -diff --git a/src/cpu/cpu_convolution_list.cpp b/src/cpu/cpu_convolution_list.cpp -index 094c73aa36..80385432d8 100644 ---- a/src/cpu/cpu_convolution_list.cpp -+++ b/src/cpu/cpu_convolution_list.cpp -@@ -63,6 +63,7 @@ using namespace dnnl::impl::cpu::x64; - #include "cpu/aarch64/jit_sve_512_x8s8s32x_convolution.hpp" - #include "cpu/aarch64/jit_uni_dw_convolution.hpp" - #if DNNL_AARCH64 && DNNL_AARCH64_USE_ACL -+#include "cpu/aarch64/acl_depthwise_convolution.hpp" - #include "cpu/aarch64/acl_gemm_convolution.hpp" - #include "cpu/aarch64/acl_indirect_gemm_convolution.hpp" - #endif -@@ -102,6 +103,7 @@ const std::map> &impl_list_map() - CPU_INSTANCE_AARCH64(jit_sve_512_dw_convolution_fwd_t) - CPU_INSTANCE_AARCH64(jit_sve_512_1x1_convolution_fwd_f32_t) - CPU_INSTANCE_AARCH64(jit_sve_512_convolution_fwd_t) -+ CPU_INSTANCE_AARCH64_ACL(acl_depthwise_convolution_fwd_t) - CPU_INSTANCE_AARCH64_ACL(acl_indirect_gemm_convolution_fwd_t) - CPU_INSTANCE_AARCH64_ACL(acl_gemm_convolution_fwd_t) - CPU_INSTANCE(gemm_convolution_fwd_t) diff --git a/third_party/mkl_dnn/onednn_acl_fixed_format_kernels.patch b/third_party/mkl_dnn/onednn_acl_fixed_format_kernels.patch deleted file mode 100644 index 5d918564fb1515..00000000000000 --- a/third_party/mkl_dnn/onednn_acl_fixed_format_kernels.patch +++ /dev/null @@ -1,1166 +0,0 @@ - ******************************************************************************* - Copyright 2023 Arm Limited and affiliates. - SPDX-License-Identifier: Apache-2.0 - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ******************************************************************************* -diff --git a/src/common/matmul_pd.hpp b/src/common/matmul_pd.hpp -index 4330ad938b..df16c5fcca 100644 ---- a/src/common/matmul_pd.hpp -+++ b/src/common/matmul_pd.hpp -@@ -159,6 +159,19 @@ protected: - - return true; - } -+ -+ // All implementations that do not support sparse inputs/outputs should -+ // call this function. -+ bool is_dense_data() { -+#ifdef DNNL_EXPERIMENTAL_SPARSE -+ for (auto md : {&src_md_, &weights_md_, &bias_md_, &dst_md_}) { -+ if (memory_desc_wrapper(md).format_kind() == format_kind::sparse) -+ return false; -+ } -+#endif -+ return true; -+ } -+ - }; - - } // namespace impl -diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp -index 37f8ecbc06..6b57374643 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.cpp -+++ b/src/cpu/aarch64/acl_convolution_utils.cpp -@@ -41,25 +41,23 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - const memory_desc_wrapper dst_d(&dst_md); - const memory_desc_wrapper bia_d(&bias_md); - -- auto math_mode = get_fpmath_mode(); -- acp.fast_math = one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); -- - // Compute Library currently supports forward propagation only - const prop_kind_t prop_kind = cd.prop_kind; - const bool is_fwd = (prop_kind == dnnl_forward_training) - || (prop_kind == dnnl_forward_inference); - if (!is_fwd) return status::unimplemented; - -- const int with_groups = wei_d.ndims() == src_d.ndims() + 1; - const int ndims = src_d.ndims(); -- const bool is_1d = ndims == 3; -- const bool is_3d = ndims == 5; -- bool is_nspc; - -- // Compute Library unsupported shape scenarios -- if (one_of(true, is_3d, is_1d, with_groups)) { -- return status::unimplemented; -- } -+ ACL_CHECK_SUPPORT(ndims != 4, " only supports 2 spatial dimensions"); -+ -+ const int with_groups = wei_d.ndims() == src_d.ndims() + 1; -+ ACL_CHECK_SUPPORT(with_groups, " does not support groups"); -+ -+ ACL_CHECK_SUPPORT(src_d.data_type() != data_type::f32 -+ || wei_d.data_type() != data_type::f32 -+ || dst_d.data_type() != data_type::f32, -+ " src, dst and wei must be fp32"); - - // batch size - const int mb = src_d.dims()[0]; -@@ -110,108 +108,143 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md, - - acp.with_bias = cd.bias_desc.format_kind != format_kind::undef; - -- auto set_or_check_tags = [&](format_tag_t desired_src_tag, -- format_tag_t desired_dst_tag) -> status_t { -- using namespace format_tag; -- auto src_tag = any, dst_tag = any; -- -- if (src_d.format_kind() == format_kind::any) { -- CHECK(memory_desc_init_by_tag(src_md, desired_src_tag)); -- src_tag = desired_src_tag; -- } else { -- src_tag = memory_desc_matches_one_of_tag(src_md, nhwc, nchw); -- } -- -- if (dst_d.format_kind() == format_kind::any) { -- CHECK(memory_desc_init_by_tag(dst_md, desired_dst_tag)); -- dst_tag = desired_dst_tag; -- } else { -- dst_tag = memory_desc_matches_one_of_tag(dst_md, nhwc, nchw); -- } -- -- if (acp.with_bias && bias_md.format_kind == format_kind::any) -- CHECK(memory_desc_init_by_tag(bias_md, x)); -- -- is_nspc = utils::one_of(src_tag, nhwc); -- -- memory_desc_t want_wei_md = weights_md; -- auto wei_tag = is_nspc ? ohwi : oihw; -- CHECK(memory_desc_init_by_tag(want_wei_md, wei_tag)); -- -- // Compute Library does not support mismatching layouts -- if ((src_tag != wei_tag) || (src_tag != dst_tag)) -- return status::unimplemented; -+ if (wei_d.format_kind() != format_kind::any) return status::unimplemented; -+ -+ auto src_tag = memory_desc_matches_one_of_tag( -+ src_md, format_tag::nhwc, format_tag::nchw); -+ auto dst_tag = memory_desc_matches_one_of_tag( -+ dst_md, format_tag::nhwc, format_tag::nchw); -+ -+ // We want src and dst to match, preferrably both to be NHWC -+ if (src_d.format_kind() == format_kind::any -+ && dst_d.format_kind() == format_kind::any) { -+ CHECK(memory_desc_init_by_tag(src_md, format_tag::nhwc)); -+ CHECK(memory_desc_init_by_tag(dst_md, format_tag::nhwc)); -+ } else if (src_d.format_kind() == format_kind::any -+ && dst_tag != format_tag::undef) { -+ CHECK(memory_desc_init_by_tag(src_md, dst_tag)); -+ } else if (dst_d.format_kind() == format_kind::any -+ && src_tag != format_tag::undef) { -+ CHECK(memory_desc_init_by_tag(dst_md, src_tag)); -+ } - -- if (weights_md.format_kind == format_kind::any) { -- weights_md = want_wei_md; -- } -- return (want_wei_md == weights_md) ? status::success -- : status::unimplemented; -- }; -+ // Recompute tags after potentially running memory desc init -+ src_tag = memory_desc_matches_one_of_tag( -+ src_md, format_tag::nhwc, format_tag::nchw); -+ dst_tag = memory_desc_matches_one_of_tag( -+ dst_md, format_tag::nhwc, format_tag::nchw); - -- auto default_dat_tag = format_tag::nhwc; -- if (set_or_check_tags(default_dat_tag, default_dat_tag) != status::success) -+ if (src_tag == format_tag::undef || dst_tag == format_tag::undef -+ || src_tag != dst_tag) - return status::unimplemented; - -- const auto acl_layout = is_nspc ? arm_compute::DataLayout::NHWC -- : arm_compute::DataLayout::NCHW; -+ // Set weights to initially be the same as src -+ CHECK(memory_desc_init_by_tag(weights_md, src_tag)); - -- // For convolutions, int8 datatypes imply quantized types in ACL -- acp.is_int8 = utils::one_of(src_d.data_type(), s8, u8) -- && wei_d.data_type() == s8; -+ // Bias is just 1D, set to be the obvious format -+ if (acp.with_bias && bias_md.format_kind == format_kind::any) -+ CHECK(memory_desc_init_by_tag(bias_md, format_tag::x)); - -- auto acl_src_data_t -- = acl_utils::get_acl_data_t(src_d.data_type(), acp.is_int8); -- auto acl_wei_data_t -- = acl_utils::get_acl_data_t(wei_d.data_type(), acp.is_int8); -- auto acl_dst_data_t -- = acl_utils::get_acl_data_t(dst_d.data_type(), acp.is_int8); -- auto acl_bia_data_t -- = acl_utils::get_acl_data_t(bia_d.data_type(), acp.is_int8); -+ bool is_nhwc = src_tag == format_tag::nhwc; -+ // The layouts have to match (although we may later modify the weights) -+ const auto acl_layout = is_nhwc ? arm_compute::DataLayout::NHWC -+ : arm_compute::DataLayout::NCHW; - -- if (acl_bia_data_t == arm_compute::DataType::UNKNOWN) -- acl_bia_data_t = arm_compute::DataType::F32; -+ auto acl_data_type = arm_compute::DataType::F32; - - // clang-format off -- acp.src_info = arm_compute::TensorInfo( -- is_nspc ? arm_compute::TensorShape(ic, iw, ih, mb) : -+ acp.src_tensor_info = arm_compute::TensorInfo( -+ is_nhwc ? arm_compute::TensorShape(ic, iw, ih, mb) : - arm_compute::TensorShape(iw, ih, ic, mb), - 1, -- acl_src_data_t, -+ acl_data_type, - acl_layout); - -- acp.wei_info = arm_compute::TensorInfo( -- is_nspc ? arm_compute::TensorShape(ic, kw, kh, oc) : -+ acp.wei_tensor_info = arm_compute::TensorInfo( -+ is_nhwc ? arm_compute::TensorShape(ic, kw, kh, oc) : - arm_compute::TensorShape(kw, kh, ic, oc), - 1, -- acl_wei_data_t, -+ acl_data_type, - acl_layout); - -- acp.dst_info = arm_compute::TensorInfo( -- is_nspc ? arm_compute::TensorShape(oc, ow, oh, mb) : -+ acp.dst_tensor_info = arm_compute::TensorInfo( -+ is_nhwc ? arm_compute::TensorShape(oc, ow, oh, mb) : - arm_compute::TensorShape(ow, oh, oc, mb), - 1, -- acl_dst_data_t, -+ acl_data_type, - acl_layout); - -- acp.bia_info = arm_compute::TensorInfo( -+ acp.bia_tensor_info = arm_compute::TensorInfo( - acp.with_bias ? arm_compute::TensorShape(oc) - : arm_compute::TensorShape(), - 1, -- acl_bia_data_t, -+ acl_data_type, - acl_layout); - // clang-format on - -- // Add quantization info to tensors -- if (acp.is_int8) { -- const float *scales = attr.output_scales_.scales_; -- acp.src_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); -- acp.bia_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); -- acp.wei_info.set_quantization_info(arm_compute::QuantizationInfo(1, 0)); -- acp.dst_info.set_quantization_info( -- arm_compute::QuantizationInfo(1.0f / scales[0], 0)); -+ // Are we allowed to cast down to bf16 or not? -+ acp.fast_math -+ = one_of(attr.fpmath_mode_, fpmath_mode::bf16, fpmath_mode::any); -+ -+ // WeightFormat::ANY tells ACL we can handle any format -+ acp.weights_info = arm_compute::WeightsInfo( -+ false, kw, kh, oc, false, arm_compute::WeightFormat::ANY); -+ -+ // Get the format that the ACL kernel will expect the weights to be -+ // in (if a kernel exists). Note that these are referred to as fixed format -+ // kernels, because they require one specific weights format -+ arm_compute::WeightFormat expected_weight_format; -+ ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::has_opt_impl( -+ expected_weight_format, &acp.src_tensor_info, &acp.wei_tensor_info, -+ acp.with_bias ? &acp.bia_tensor_info : nullptr, -+ &acp.dst_tensor_info, acp.padstride_info, acp.weights_info, -+ acp.dilation_info, acp.act_info, acp.fast_math)); -+ -+ // Set weights info to the one returned by has_opt_impl -+ acp.weights_info.set_weight_format(expected_weight_format); -+ -+ // has_opt_impl may return a non fast math kernel, even if we requested one -+ acp.fast_math -+ = arm_compute::is_fixed_format_fast_math(expected_weight_format); -+ -+ // Map OIHW used in ACL WeightFormat to the logical dimensions of the memory descriptor -+ dim_t O_dim = 0; -+ dim_t I_dim = 1; -+ dim_t H_dim = 2; -+ dim_t W_dim = 3; -+ -+ if (!is_nhwc) { -+ // We can try to support NCHW by swapping IHW around, note that this -+ // requires weights_md.dims[I_dim] % block_by != 0 (see next block) -+ O_dim = 0; -+ I_dim = 3; -+ H_dim = 1; -+ W_dim = 2; - } - -+ // We can't currently support nchw and block_by != 1. If this is the case, -+ // try a non fast math kernel, which currently have no blocking -+ int block_by = arm_compute::block_by(acp.weights_info.weight_format()); -+ if (!is_nhwc && weights_md.dims[I_dim] % block_by != 0 && acp.fast_math) { -+ acp.fast_math = false; -+ acp.weights_info.set_weight_format(arm_compute::WeightFormat::ANY); -+ ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::has_opt_impl( -+ expected_weight_format, &acp.src_tensor_info, -+ &acp.wei_tensor_info, -+ acp.with_bias ? &acp.bia_tensor_info : nullptr, -+ &acp.dst_tensor_info, acp.padstride_info, acp.weights_info, -+ acp.dilation_info, acp.act_info, acp.fast_math)); -+ acp.weights_info.set_weight_format(expected_weight_format); -+ block_by = arm_compute::block_by(expected_weight_format); -+ // This shouldn't happen, because non-fastmath have no blocking, but -+ // guard against it because it would silently return incorrect results -+ if (weights_md.dims[I_dim] % block_by != 0) -+ return status::unimplemented; -+ } -+ -+ acl_utils::reorder_to_weight_format(acp.wei_tensor_info, weights_md, -+ expected_weight_format, I_dim, O_dim, {W_dim, H_dim}, {}); -+ - return status::success; - } - -@@ -226,10 +259,10 @@ status_t init_conf_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - // clang-format off - // Validate convolution manually to check for return status - ACL_CHECK_VALID(arm_compute::NEGEMMConvolutionLayer::validate( -- &acp.src_info, -- &acp.wei_info, -- acp.with_bias ? &acp.bia_info : nullptr, -- &acp.dst_info, -+ &acp.src_tensor_info, -+ &acp.wei_tensor_info, -+ acp.with_bias ? &acp.bia_tensor_info : nullptr, -+ &acp.dst_tensor_info, - acp.padstride_info, - acp.weights_info, - acp.dilation_info, -@@ -244,28 +277,38 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - memory_desc_t &weights_md, memory_desc_t &dst_md, - memory_desc_t &bias_md, const convolution_desc_t &cd, - const primitive_attr_t &attr) { -- // Indirect convolution results in slowdown for low thread count or 1x1 -- // kernels, so fall back to GEMM-based convolution in these cases -- if (one_of(true, weights_md.dims[2] == 1, // kh -- weights_md.dims[3] == 1, // kw -- dnnl_get_max_threads() < 28)) { -+ -+ // Indirect is slower for small convolution kernels -+ if (weights_md.dims[2] == 1 && weights_md.dims[3] == 1) - return status::unimplemented; -- } - - CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); - -+ // Indirect is slower than gemm for low thread counts, except for fast math -+ if (dnnl_get_max_threads() < 28 && !acp.fast_math) -+ return status::unimplemented; -+ -+ // If we do not need to pad input channels for fast math mode then it would -+ // be faster to run convolution with im2row instead of using indirect kernel -+ int block_by = arm_compute::block_by(acp.weights_info.weight_format()); -+ int ic = src_md.dims[1]; -+ if (acp.fast_math && ic % block_by == 0) return status::unimplemented; -+ -+ // TODO: remove this once NEGEMMConv2d::validate allows src and weights to mismatch -+ acp.wei_tensor_info.set_data_layout(arm_compute::DataLayout::NHWC); -+ - // clang-format off - // NOTE: indirect convolution method supports only nhwc layout. - ACL_CHECK_VALID(arm_compute::NEGEMMConv2d::validate( -- &acp.src_info, -- &acp.wei_info, -- acp.with_bias ? &acp.bia_info : nullptr, -- &acp.dst_info, -+ &acp.src_tensor_info, -+ &acp.wei_tensor_info, -+ acp.with_bias ? &acp.bia_tensor_info : nullptr, -+ &acp.dst_tensor_info, - arm_compute::Conv2dInfo(acp.padstride_info, - acp.dilation_info, - acp.act_info, - acp.fast_math, -- 1))); -+ 1, {}, acp.weights_info))); - // clang-format on - - return status::success; -diff --git a/src/cpu/aarch64/acl_convolution_utils.hpp b/src/cpu/aarch64/acl_convolution_utils.hpp -index 0398ab06b9..e3d40a5e75 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.hpp -+++ b/src/cpu/aarch64/acl_convolution_utils.hpp -@@ -38,17 +38,17 @@ struct acl_obj_t { - - struct acl_conv_conf_t { - bool with_bias; -- bool is_int8; - bool fast_math; - // If this is true, the result of the convolution goes into a temporarily - // allocated ACL tensor to be accumulated into the oneDNN dst during postops - bool use_dst_acc; -- arm_compute::TensorInfo src_info; -- arm_compute::TensorInfo wei_info; -- arm_compute::TensorInfo bia_info; -- arm_compute::TensorInfo dst_info; -+ arm_compute::TensorInfo src_tensor_info; -+ arm_compute::TensorInfo wei_tensor_info; -+ arm_compute::TensorInfo bia_tensor_info; -+ arm_compute::TensorInfo dst_tensor_info; - arm_compute::PadStrideInfo padstride_info; - arm_compute::Size2D dilation_info; -+ // Additional information about the weights not included in wei_tensor_info - arm_compute::WeightsInfo weights_info; - // Note: this will default to not enabled, and will do nothing - arm_compute::ActivationLayerInfo act_info; -diff --git a/src/cpu/aarch64/acl_gemm_convolution.hpp b/src/cpu/aarch64/acl_gemm_convolution.hpp -index 485db954ea..da58e4f610 100644 ---- a/src/cpu/aarch64/acl_gemm_convolution.hpp -+++ b/src/cpu/aarch64/acl_gemm_convolution.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2020-2022 Arm Ltd. and affiliates -+* Copyright 2020-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -36,10 +36,10 @@ struct acl_resource_t : public resource_t { - if (!acl_obj_) return status::out_of_memory; - - // Init Compute Library tensors based on info from descriptor -- acl_obj_->src_tensor.allocator()->init(acp.src_info); -- acl_obj_->wei_tensor.allocator()->init(acp.wei_info); -- acl_obj_->dst_tensor.allocator()->init(acp.dst_info); -- acl_obj_->bia_tensor.allocator()->init(acp.bia_info); -+ acl_obj_->src_tensor.allocator()->init(acp.src_tensor_info); -+ acl_obj_->wei_tensor.allocator()->init(acp.wei_tensor_info); -+ acl_obj_->dst_tensor.allocator()->init(acp.dst_tensor_info); -+ acl_obj_->bia_tensor.allocator()->init(acp.bia_tensor_info); - - acl_obj_->conv.configure(&acl_obj_->src_tensor, &acl_obj_->wei_tensor, - acp.with_bias ? &acl_obj_->bia_tensor : nullptr, -diff --git a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp -index bcf031a771..b7c8dce894 100644 ---- a/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp -+++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021-2022 Arm Ltd. and affiliates -+* Copyright 2021-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -35,10 +35,10 @@ struct acl_indirect_gemm_resource_t : public resource_t { - if (!acl_obj_) return status::out_of_memory; - - // Init Compute Library tensors based on info from descriptor -- acl_obj_->src_tensor.allocator()->init(acp.src_info); -- acl_obj_->wei_tensor.allocator()->init(acp.wei_info); -- acl_obj_->dst_tensor.allocator()->init(acp.dst_info); -- acl_obj_->bia_tensor.allocator()->init(acp.bia_info); -+ acl_obj_->src_tensor.allocator()->init(acp.src_tensor_info); -+ acl_obj_->wei_tensor.allocator()->init(acp.wei_tensor_info); -+ acl_obj_->dst_tensor.allocator()->init(acp.dst_tensor_info); -+ acl_obj_->bia_tensor.allocator()->init(acp.bia_tensor_info); - - // clang-format off - acl_obj_->conv.configure( -@@ -50,7 +50,9 @@ struct acl_indirect_gemm_resource_t : public resource_t { - acp.dilation_info, - acp.act_info, - acp.fast_math, -- 1)); -+ 1, -+ {}, -+ acp.weights_info)); - // clang-format on - - return status::success; -diff --git a/src/cpu/aarch64/acl_inner_product.hpp b/src/cpu/aarch64/acl_inner_product.hpp -index c5e507085f..a27df640fb 100644 ---- a/src/cpu/aarch64/acl_inner_product.hpp -+++ b/src/cpu/aarch64/acl_inner_product.hpp -@@ -40,11 +40,13 @@ struct acl_ip_conf_t { - // If this is true, the result of the inner product goes into a temporarily - // allocated ACL tensor to be accumulated into the oneDNN dst during postops - bool use_dst_acc; -- arm_compute::TensorInfo src_info; -- arm_compute::TensorInfo wei_info; -- arm_compute::TensorInfo bia_info; -- arm_compute::TensorInfo dst_info; -+ arm_compute::TensorInfo src_tensor_info; -+ arm_compute::TensorInfo wei_tensor_info; -+ arm_compute::TensorInfo bia_tensor_info; -+ arm_compute::TensorInfo dst_tensor_info; - arm_compute::FullyConnectedLayerInfo fc_info; -+ // Additional information about the weights not included in wei_tensor_info -+ arm_compute::WeightsInfo weights_info; - }; - struct acl_ip_resource_t : public resource_t { - acl_ip_resource_t() : acl_ip_obj_(utils::make_unique()) {} -@@ -53,10 +55,10 @@ struct acl_ip_resource_t : public resource_t { - if (!acl_ip_obj_) return status::out_of_memory; - - // Init Compute Library tensors based on info from descriptor -- acl_ip_obj_->src_tensor.allocator()->init(aip.src_info); -- acl_ip_obj_->wei_tensor.allocator()->init(aip.wei_info); -- acl_ip_obj_->dst_tensor.allocator()->init(aip.dst_info); -- acl_ip_obj_->bia_tensor.allocator()->init(aip.bia_info); -+ acl_ip_obj_->src_tensor.allocator()->init(aip.src_tensor_info); -+ acl_ip_obj_->wei_tensor.allocator()->init(aip.wei_tensor_info); -+ acl_ip_obj_->dst_tensor.allocator()->init(aip.dst_tensor_info); -+ acl_ip_obj_->bia_tensor.allocator()->init(aip.bia_tensor_info); - - // clang-format off - acl_ip_obj_->fc.configure( -@@ -64,7 +66,8 @@ struct acl_ip_resource_t : public resource_t { - &acl_ip_obj_->wei_tensor, - aip.with_bias ? &acl_ip_obj_->bia_tensor : nullptr, - &acl_ip_obj_->dst_tensor, -- aip.fc_info); -+ aip.fc_info, -+ aip.weights_info); - // clang-format on - - return status::success; -@@ -89,12 +92,16 @@ struct acl_inner_product_fwd_t : public primitive_t { - DECLARE_COMMON_PD_T("acl", acl_inner_product_fwd_t); - - status_t init(engine_t *engine) { -- const bool ok = is_fwd() && !has_zero_dim_memory() -- && expect_data_types(data_type::f32, data_type::f32, -- data_type::f32, data_type::f32, data_type::f32) -+ using namespace data_type; -+ const bool is_fp16_ok = expect_data_types(f16, f16, f16, f16, undef) -+ && attr()->has_default_values( -+ primitive_attr_t::skip_mask_t::post_ops, f16); -+ const bool is_fp32_ok = expect_data_types(f32, f32, f32, f32, undef) - && attr()->has_default_values( -- primitive_attr_t::skip_mask_t::post_ops, -- data_type::f32) -+ primitive_attr_t::skip_mask_t::post_ops, f32); -+ const bool ok = is_fwd() && !has_zero_dim_memory() -+ && utils::one_of(true, is_fp16_ok, is_fp32_ok) -+ && weights_md_.format_kind == format_kind::any - && set_default_params() == status::success; - - if (!ok) return status::unimplemented; -@@ -121,88 +128,46 @@ struct acl_inner_product_fwd_t : public primitive_t { - ACL_CHECK_SUPPORT( - !(is_2d || is_4d), "ACL supports only 2d or 4d cases"); - -- // batch size -- const int n = src_md()->dims[0]; -- -- // input and output channels -- const int ic = src_md()->dims[1]; -- const int oc = dst_md()->dims[1]; -- -- // source spatial dimensions -- const int ih = is_4d ? src_md()->dims[ndims - 2] : 0; -- const int iw = is_4d ? src_md()->dims[ndims - 1] : 0; -- -- // weights spatial dimensions -- const int kh = is_4d ? weights_md()->dims[ndims - 2] : 0; -- const int kw = is_4d ? weights_md()->dims[ndims - 1] : 0; -- -- // Only NCHW or NHWC derivatives supported by ACL kernels - using namespace format_tag; -- auto src_tag = memory_desc_matches_one_of_tag( -- src_md_, nhwc, nchw, nc, cn); -- auto wei_tag = memory_desc_matches_one_of_tag( -- weights_md_, ohwi, oihw, oi, io); -- auto dst_tag = memory_desc_matches_one_of_tag(dst_md_, nc, cn); -+ auto src_tag -+ = memory_desc_matches_one_of_tag(src_md_, nhwc, nchw, nc); -+ auto dst_tag = memory_desc_matches_one_of_tag(dst_md_, nc); - - ACL_CHECK_SUPPORT( -- utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag), -+ utils::one_of(format_tag::undef, src_tag, dst_tag), - "unsupported memory layout"); - - ACL_CHECK_SUPPORT(is_2d && src_tag != dst_tag, - "for src and dst layouts must match"); - -- arm_compute::TensorShape src_shape, wei_shape; -- if (is_2d) { -- src_shape = (src_tag == nc) ? arm_compute::TensorShape(ic, n) -- : arm_compute::TensorShape(n, ic); -- -- wei_shape = (wei_tag == io) ? arm_compute::TensorShape(oc, ic) -- : arm_compute::TensorShape(ic, oc); -- } -- if (is_4d) { -- src_shape = (src_tag == nhwc) -- ? arm_compute::TensorShape(ic, iw, ih, n) -- : arm_compute::TensorShape(iw, ih, ic, n); -- -- // ACL requires the weights to be in 2D flattened shape -- const int flattened_ic = is_4d ? ic * kh * kw : ic; -- wei_shape = arm_compute::TensorShape(flattened_ic, oc); -- } -- -- arm_compute::DataLayout src_layout = (src_tag == nhwc) -- ? arm_compute::DataLayout::NHWC -- : arm_compute::DataLayout::NCHW; -+ const dim_t ic_total = IC_total(); -+ const dim_t n = MB(); -+ const dim_t oc = OC(); - -- arm_compute::DataLayout wei_layout = (wei_tag == ohwi) -- ? arm_compute::DataLayout::NHWC -- : arm_compute::DataLayout::NCHW; -+ aip.src_tensor_info = arm_compute::TensorInfo( -+ arm_compute::TensorShape(ic_total, n), 1, -+ acl_utils::get_acl_data_t(src_md()->data_type)); - -- aip.src_info = arm_compute::TensorInfo( -- src_shape, 1, arm_compute::DataType::F32, src_layout); -+ // ACL requires the weights to be in 2D flattened shape -+ aip.wei_tensor_info = arm_compute::TensorInfo( -+ arm_compute::TensorShape(oc, ic_total), 1, -+ acl_utils::get_acl_data_t(weights_md(0)->data_type)); - -- aip.wei_info = arm_compute::TensorInfo( -- wei_shape, 1, arm_compute::DataType::F32, wei_layout); -- -- aip.dst_info -- = arm_compute::TensorInfo(arm_compute::TensorShape(oc, n), -- 1, arm_compute::DataType::F32); -+ auto acl_dst_data_t -+ = acl_utils::get_acl_data_t(dst_md()->data_type); -+ aip.dst_tensor_info = arm_compute::TensorInfo( -+ arm_compute::TensorShape(oc, n), 1, acl_dst_data_t); - - aip.with_bias = desc()->bias_desc.format_kind != format_kind::undef; -- aip.bia_info = arm_compute::TensorInfo(aip.with_bias -+ auto acl_bia_data_t = aip.with_bias -+ ? acl_utils::get_acl_data_t(weights_md(1)->data_type) -+ : acl_dst_data_t; -+ aip.bia_tensor_info = arm_compute::TensorInfo(aip.with_bias - ? arm_compute::TensorShape(oc) - : arm_compute::TensorShape(), - 1, arm_compute::DataType::F32); - -- aip.fc_info.weights_trained_layout = wei_layout; -- if (is_2d && wei_tag != src_tag) { -- // weights are already transposed -- aip.fc_info.transpose_weights = false; -- -- if (desc()->prop_kind == dnnl_forward_training) { -- aip.wei_info.set_are_values_constant(false); -- aip.fc_info.are_weights_reshaped = true; -- } -- } -+ aip.fc_info.transpose_weights = false; - - // Fast math mode - auto math_mode = get_fpmath_mode(); -@@ -214,15 +179,103 @@ struct acl_inner_product_fwd_t : public primitive_t { - aip.fc_info.activation_info)); - aip.use_dst_acc = post_ops.has_sum(); - -+ // WeightFormat::ANY tells ACL we can handle any format -+ aip.weights_info = arm_compute::WeightsInfo(false, 1, 1, ic_total, -+ false, arm_compute::WeightFormat::ANY); -+ -+ // Get the format that the ACL kernel will expect the weights to be -+ // in (if a kernel exists) Note that these are referred to as fixed -+ // format kernels, because they require one specific weights format -+ arm_compute::WeightFormat expected_weight_format; -+ ACL_CHECK_VALID(arm_compute::NEFullyConnectedLayer::has_opt_impl( -+ expected_weight_format, &aip.src_tensor_info, -+ &aip.wei_tensor_info, -+ aip.with_bias ? &aip.bia_tensor_info : nullptr, -+ &aip.dst_tensor_info, aip.fc_info, aip.weights_info)); -+ -+ // Set weights info to the one returned by has_opt_impl -+ aip.weights_info.set_weight_format(expected_weight_format); -+ -+ // has_opt_impl may return a non fast math kernel, even if requested -+ aip.fc_info.enable_fast_math -+ = arm_compute::is_fixed_format_fast_math( -+ expected_weight_format); -+ -+ // Inner product is the same as the matmul n x (chw) * (ihw) x o -+ // (note that the src c and weights i both correspond to the input -+ // channel). ACL FullyConnectedLayer assumes the chw dimensions of -+ // src and ihw dimensions of weights are collapsed, so we need to -+ // make sure that they have the same layout. Given that weights are -+ // more often fixed, (so reorders can be hoisted) it makes sense to -+ // reorder the weights to fit the src. -+ -+ // For 4D tensors we need to: -+ // - reorder the ihw of the weights to match the src chw -+ // - collapse ihw -+ // - pad the collapsed ihw -+ // But there is not yet a way to express this collapse+pad as a -+ // reorder. So we try to reorder the weights to match the src, -+ // implicitly collapse ihw in our definition of the weights -+ // TensorInfo and hope that the inner_dim has zero padding -+ // (weights_md_.dims[inner_dim] % block_by == 0). If it does, we -+ // fall back to a kernel without blocking (currently this is -+ // equivalent to non-fastmath). -+ -+ // 2D just works because we just pad the only dimension. -+ -+ // o_dim is always the first logical dimension (oihw, ohwi, oi) -+ dim_t o_dim = 0; -+ dim_t inner_dim; -+ // Rest of logical dimensions in order of innermost to outermost -+ std::vector remaining_dims = {}; -+ -+ if (src_tag == nchw) { -+ inner_dim = 3; // w -+ remaining_dims = {2, 1}; // h, i -+ } else if (src_tag == nhwc) { -+ inner_dim = 1; // i -+ remaining_dims = {3, 2}; // w, h -+ } else { // Only remaining case is 2D (nc) -+ inner_dim = 1; // i -+ remaining_dims = {}; // No other dimensions for 2D -+ } -+ -+ // Fallback -+ int block_by = arm_compute::block_by(expected_weight_format); -+ if (is_4d && weights_md_.dims[inner_dim] % block_by != 0 -+ && aip.fc_info.enable_fast_math) { -+ aip.fc_info.enable_fast_math = false; -+ aip.weights_info.set_weight_format( -+ arm_compute::WeightFormat::ANY); -+ ACL_CHECK_VALID( -+ arm_compute::NEFullyConnectedLayer::has_opt_impl( -+ expected_weight_format, &aip.src_tensor_info, -+ &aip.wei_tensor_info, -+ aip.with_bias ? &aip.bia_tensor_info : nullptr, -+ &aip.dst_tensor_info, aip.fc_info, -+ aip.weights_info)); -+ aip.weights_info.set_weight_format(expected_weight_format); -+ block_by = arm_compute::block_by(expected_weight_format); -+ if (weights_md_.dims[inner_dim] % block_by != 0) -+ return status::unimplemented; -+ } -+ -+ acl_utils::reorder_to_weight_format(aip.wei_tensor_info, -+ weights_md_, expected_weight_format, inner_dim, o_dim, -+ remaining_dims, {}); -+ - // clang-format off -+ - // Validate fully connected layer manually to check for return status - ACL_CHECK_VALID(arm_compute::NEFullyConnectedLayer::validate( -- &aip.src_info, -- &aip.wei_info, -- aip.with_bias ? &aip.bia_info : nullptr, -- &aip.dst_info, -- aip.fc_info)); -+ &aip.src_tensor_info, -+ &aip.wei_tensor_info, -+ aip.with_bias ? &aip.bia_tensor_info : nullptr, -+ &aip.dst_tensor_info, -+ aip.fc_info, -+ aip.weights_info)); - // clang-format on -+ - return status::success; - } - }; // pd_t -diff --git a/src/cpu/aarch64/acl_utils.cpp b/src/cpu/aarch64/acl_utils.cpp -index 79ea775d6d..5792fd4911 100644 ---- a/src/cpu/aarch64/acl_utils.cpp -+++ b/src/cpu/aarch64/acl_utils.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021-2022 Arm Ltd. and affiliates -+* Copyright 2021-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -261,6 +261,75 @@ int reorder_dimensions_by_stride(std::vector permuted_mds, - return reordered_dims; - } - -+void reorder_to_weight_format(arm_compute::TensorInfo &info, memory_desc_t &md, -+ arm_compute::WeightFormat wf, dim_t I_dim, dim_t O_dim, -+ std::vector spatial_dims, std::vector batch_dims) { -+ -+ md.format_kind = format_kind::blocked; -+ md.format_desc.blocking = blocking_desc_t {}; -+ const int interleaved_by = arm_compute::interleave_by(wf); -+ const int block_by = arm_compute::block_by(wf); -+ -+ // I dimension becomes densest (apart from blocking) -+ md.format_desc.blocking.strides[I_dim] = interleaved_by * block_by; -+ md.padded_dims[I_dim] = utils::rnd_up(md.dims[I_dim], block_by); -+ -+ // Then any spatial dimensions (e.g. HW) -+ dim_t ldb = interleaved_by * md.padded_dims[I_dim]; -+ for (dim_t sd : spatial_dims) { -+ md.format_desc.blocking.strides[sd] = ldb; -+ ldb *= md.padded_dims[sd]; -+ } -+ -+ // O dim (which was the innermost) becomes the outermost (apart from batching) -+ md.format_desc.blocking.strides[O_dim] = ldb; -+ md.padded_dims[O_dim] = utils::rnd_up(md.dims[O_dim], interleaved_by); -+ -+ // Update the batch dimensions, starting with stride of the innermost batch -+ const dim_t innermost_batch_stride -+ = md.padded_dims[I_dim] * md.padded_dims[O_dim]; -+ dim_t batch_stride = innermost_batch_stride; -+ for (dim_t bd : batch_dims) { -+ md.format_desc.blocking.strides[bd] = batch_stride; -+ batch_stride *= md.padded_dims[bd]; -+ } -+ -+ // Weights can only be blocked if they are also interleaved -+ if (interleaved_by > 1) { -+ md.format_desc.blocking.inner_nblks = 1 + (block_by > 1); -+ -+ md.format_desc.blocking.inner_idxs[0] = O_dim; -+ md.format_desc.blocking.inner_blks[0] = interleaved_by; -+ if (block_by > 1) { -+ md.format_desc.blocking.inner_idxs[1] = I_dim; -+ md.format_desc.blocking.inner_blks[1] = block_by; -+ } -+ } -+ -+ if (arm_compute::is_fixed_format_fast_math(wf)) { -+ md.data_type = dnnl_bf16; -+ info.set_data_type(arm_compute::DataType::BFLOAT16); -+ } -+ -+ // The data layout is now determined by the manually set strides -+ info.set_data_layout(arm_compute::DataLayout::UNKNOWN); -+ -+ // x is ignored in fixed format kernels -+ // y is the leading dimension of b (ldb) in the GEMM d = a*b + c -+ // This is the stride of O_dim in the md -+ // z is the batch dimension (not strictly needed if there's only 1 batch) -+ // i.e. how much do I need to stride to get to the next matmul (ignoring -+ // the interleaving). Note that we use the innermost_batch_stride -+ // because all the batched dimensions are collapsed (as required by ACL). -+ arm_compute::Strides new_strides_in_bytes = info.strides_in_bytes(); -+ new_strides_in_bytes.set(1, ldb * info.element_size()); -+ new_strides_in_bytes.set(2, innermost_batch_stride * info.element_size()); -+ -+ info.init(info.tensor_shape(), info.num_channels(), info.data_type(), -+ new_strides_in_bytes, info.offset_first_element_in_bytes(), -+ memory_desc_wrapper(md).size()); -+} -+ - } // namespace acl_utils - - } // namespace aarch64 -diff --git a/src/cpu/aarch64/acl_utils.hpp b/src/cpu/aarch64/acl_utils.hpp -index 28693bb167..d9affe1c8f 100644 ---- a/src/cpu/aarch64/acl_utils.hpp -+++ b/src/cpu/aarch64/acl_utils.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021-2022 Arm Ltd. and affiliates -+* Copyright 2021-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -74,6 +74,28 @@ status_t insert_singleton_dimension(arm_compute::TensorInfo &ti, size_t dim_i); - int reorder_dimensions_by_stride(std::vector permuted_mds, - std::vector mds); - -+// Reorder a memory_desc_t and set the strides on a arm_compute::TensorInfo to -+// match an arm_compute::WeightFormat. You are required to specify how various -+// logical dimensions in oneDNN correspond to logical dimensions in arm_compute. -+// info TensorInfo where the strides will be changed to match the reordering -+// md memory descriptor where the stride and padded dimensions will be -+// changed or reordering -+// wf Describes the memory format/layout of the weights -+// I_dim The logical dimension of md corresponding to the input channel of -+// a convolution or the K dimension in a matmul -+// O_dim The logical dimension of md corresponding to the output channel of a -+//   convolution or the N dimension in a matmul -+// spatial_dims The logical dimensions of md corresponding to the spatial -+// dimensions of the weights (H, W, D for example). These will be -+// the next densest after the inner blocks and the input channel. -+// batch_dims The logical dimensions of md related to the batch in a batched -+// matmul, ordered from innermost to outermost. ACL calls these -+// the multi_stride_b. These will become the outermost (least dense) -+// dimensions and will be collapsed. -+void reorder_to_weight_format(arm_compute::TensorInfo &info, memory_desc_t &md, -+ arm_compute::WeightFormat wf, dim_t I_dim, dim_t O_dim, -+ std::vector spatial_dims, std::vector batch_dims = {}); -+ - // Logs a custom 'info' line describing an unsupported case - #define LOG_ACL_UNSUPPORTED(msg) \ - do { \ -diff --git a/src/cpu/aarch64/matmul/acl_matmul.cpp b/src/cpu/aarch64/matmul/acl_matmul.cpp -index dce220fb6e..ca1c7eb47e 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul.cpp -+++ b/src/cpu/aarch64/matmul/acl_matmul.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021-2022 Arm Ltd. and affiliates -+* Copyright 2021-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -31,36 +31,19 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { - auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS); - - bool is_transA = pd()->amp_.is_transA; -- bool is_transB = pd()->amp_.is_transB; - bool use_dst_acc = pd()->amp_.use_dst_acc; - - std::lock_guard _lock {this->mtx}; - auto *acl_resource = ctx.get_resource_mapper()->get(this); - acl_matmul_obj_t &acl_obj = acl_resource->get_acl_obj(); - // Run transpose kernel -- if (is_transA && !is_transB) { -+ if (is_transA) { - acl_obj.src_tensor.allocator()->allocate(); - acl_obj.src_acc_tensor.allocator()->import_memory( - const_cast(src_base)); - acl_obj.transA.run(); - acl_obj.wei_tensor.allocator()->import_memory( - const_cast(wei_base)); -- } else if (is_transB && !is_transA) { -- acl_obj.wei_tensor.allocator()->allocate(); -- acl_obj.wei_acc_tensor.allocator()->import_memory( -- const_cast(wei_base)); -- acl_obj.transB.run(); -- acl_obj.src_tensor.allocator()->import_memory( -- const_cast(src_base)); -- } else if (is_transA && is_transB) { -- acl_obj.src_tensor.allocator()->allocate(); -- acl_obj.src_acc_tensor.allocator()->import_memory( -- const_cast(src_base)); -- acl_obj.wei_tensor.allocator()->allocate(); -- acl_obj.wei_acc_tensor.allocator()->import_memory( -- const_cast(wei_base)); -- acl_obj.transA.run(); -- acl_obj.transB.run(); - } else { - acl_obj.src_tensor.allocator()->import_memory( - const_cast(src_base)); -@@ -69,7 +52,7 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { - } - - if (use_dst_acc) { -- // Put the result in a new tensor, it will be accumalated to the dst -+ // Put the result in a new tensor, it will be accumulated to the dst - // during the post ops - acl_obj.dst_tensor.allocator()->allocate(); - } else { -@@ -82,7 +65,6 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { - acl_obj.src_tensor.allocator()->free(); - acl_obj.wei_tensor.allocator()->free(); - if (is_transA) acl_obj.src_acc_tensor.allocator()->free(); -- if (is_transB) acl_obj.wei_acc_tensor.allocator()->free(); - - void *dst = acl_obj.dst_tensor.buffer(); - pd()->post_ops.execute(ctx, dst); -diff --git a/src/cpu/aarch64/matmul/acl_matmul.hpp b/src/cpu/aarch64/matmul/acl_matmul.hpp -index cdc942e995..832b1dbb68 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul.hpp -+++ b/src/cpu/aarch64/matmul/acl_matmul.hpp -@@ -32,20 +32,15 @@ struct acl_resource_t : public resource_t { - - status_t configure(const acl_matmul_conf_t &) { - if (!acl_obj_) return status::out_of_memory; -- acl_obj_->src_tensor.allocator()->init(amp.src_info); -- acl_obj_->wei_tensor.allocator()->init(amp.wei_info); -- acl_obj_->dst_tensor.allocator()->init(amp.dst_info); -+ acl_obj_->src_tensor.allocator()->init(amp.src_tensor_info); -+ acl_obj_->wei_tensor.allocator()->init(amp.wei_tensor_info); -+ acl_obj_->dst_tensor.allocator()->init(amp.dst_tensor_info); - // Configure transpose kernel for src, wei or both - if (amp.is_transA) { - acl_obj_->src_acc_tensor.allocator()->init(amp.src_acc_info); - acl_obj_->transA.configure( - &acl_obj_->src_acc_tensor, &acl_obj_->src_tensor); - } -- if (amp.is_transB) { -- acl_obj_->wei_acc_tensor.allocator()->init(amp.wei_acc_info); -- acl_obj_->transB.configure( -- &acl_obj_->wei_acc_tensor, &acl_obj_->wei_tensor); -- } - // Configure GEMM - acl_obj_->gemm.configure(&acl_obj_->src_tensor, &acl_obj_->wei_tensor, - nullptr, &acl_obj_->dst_tensor, amp.alpha, 0.0f, amp.gemm_info); -@@ -72,12 +67,20 @@ struct acl_matmul_t : public primitive_t { - - status_t init(engine_t *engine) { - using smask_t = primitive_attr_t::skip_mask_t; -- bool ok = src_md()->data_type == data_type::f32 -- && weights_md()->data_type == data_type::f32 -- && desc()->accum_data_type == data_type::f32 -- && dst_md()->data_type == data_type::f32 -- && platform::has_data_type_support(data_type::f32) -+ const bool is_fp32_ok -+ = utils::everyone_is(data_type::f32, src_md()->data_type, -+ weights_md()->data_type, dst_md()->data_type, -+ desc()->accum_data_type) -+ && platform::has_data_type_support(data_type::f32); -+ const bool is_fp16_ok -+ = utils::everyone_is(data_type::f16, src_md()->data_type, -+ weights_md()->data_type, dst_md()->data_type) -+ && platform::has_data_type_support(data_type::f16); -+ bool ok = is_dense_data() -+ && utils::one_of(true, is_fp32_ok, is_fp16_ok) - && !has_zero_dim_memory() -+ && weights_md_.format_kind == format_kind::any -+ && set_default_formats() - && attr()->has_default_values( - smask_t::oscale | smask_t::post_ops) - && attr_oscale_ok() && !has_runtime_dims_or_strides(); -@@ -92,9 +95,9 @@ struct acl_matmul_t : public primitive_t { - amp_.use_dst_acc = post_ops.has_sum(); - - // Validate ACL GEMM -- ACL_CHECK_VALID(arm_compute::NEGEMM::validate(&_.src_info, -- &_.wei_info, nullptr, &_.dst_info, amp_.alpha, 0.0f, -- amp_.gemm_info)); -+ ACL_CHECK_VALID(arm_compute::NEGEMM::validate(&_.src_tensor_info, -+ &_.wei_tensor_info, nullptr, &_.dst_tensor_info, -+ amp_.alpha, 0.0f, amp_.gemm_info)); - - return status::success; - } -diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp -index 679baec3a4..30bc2c1443 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul_utils.cpp -+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.cpp -@@ -41,6 +41,7 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, - const dim_t src_batch = helper.src_batch(); - const dim_t wei_batch = helper.wei_batch(); - -+ // We can only broadcast on one of src or wei at once - // ACL supports broadcast for 3D shapes, and 4D shapes - // for e.g when ab in abcd is 1x1 - bool batch_ok = IMPLICATION(src_batch > 1, wei_batch == 1) -@@ -53,44 +54,33 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, - bool with_bias = md.bias_desc.format_kind != format_kind::undef; - ACL_CHECK_SUPPORT(with_bias, "ACL does not support bias for matmul"); - -+ // The two innermost dimensions can be transposed, but the batch dimensions -+ // must be the outermost - using namespace format_tag; - auto src_tag = memory_desc_matches_one_of_tag( - src_md, abcd, abdc, abc, acb, ab, ba); -- auto wei_tag = memory_desc_matches_one_of_tag( -- wei_md, abcd, abdc, abc, acb, ab, ba); -- auto dst_tag -- = memory_desc_matches_one_of_tag(dst_md, abcd, abc, acb, ab, ba); -- ACL_CHECK_SUPPORT( -- utils::one_of(format_tag::undef, src_tag, wei_tag, dst_tag), -+ auto dst_tag = memory_desc_matches_one_of_tag(dst_md, abcd, abc, ab, ba); -+ ACL_CHECK_SUPPORT(utils::one_of(format_tag::undef, src_tag, dst_tag), - "Format tag is undefined"); - -- // Transpose A (src) or B (wei) -+ // Transpose A (src) - amp.is_transA = helper.transA() == 'T'; -- amp.is_transB = helper.transB() == 'T'; -+ -+ auto acl_src_data_t = acl_utils::get_acl_data_t(src_md.data_type); -+ auto acl_wei_data_t = acl_utils::get_acl_data_t(wei_md.data_type); -+ auto acl_dst_data_t = acl_utils::get_acl_data_t(dst_md.data_type); -+ - if (amp.is_transA) - amp.src_acc_info = arm_compute::TensorInfo( - arm_compute::TensorShape(M, K, 1, src_batch), 1, -- arm_compute::DataType::F32); -- if (amp.is_transB) -- amp.wei_acc_info = arm_compute::TensorInfo( -- arm_compute::TensorShape(K, N, wei_batch), 1, -- arm_compute::DataType::F32); -- -- amp.src_info = arm_compute::TensorInfo( -- arm_compute::TensorShape(K, M, 1, src_batch), 1, -- arm_compute::DataType::F32); -- amp.wei_info -- = arm_compute::TensorInfo(arm_compute::TensorShape(N, K, wei_batch), -- 1, arm_compute::DataType::F32); -- amp.dst_info = arm_compute::TensorInfo( -- arm_compute::TensorShape(N, M, 1, dst_batch), 1, -- arm_compute::DataType::F32); -- -- // Fast-math mode -- auto math_mode = get_fpmath_mode(); -- bool is_fastmath_enabled -- = utils::one_of(math_mode, fpmath_mode::bf16, fpmath_mode::any); -- amp.gemm_info.set_fast_math(is_fastmath_enabled); -+ acl_src_data_t); -+ -+ amp.src_tensor_info = arm_compute::TensorInfo( -+ arm_compute::TensorShape(K, M, 1, src_batch), 1, acl_src_data_t); -+ amp.wei_tensor_info = arm_compute::TensorInfo( -+ arm_compute::TensorShape(N, K, wei_batch), 1, acl_wei_data_t); -+ amp.dst_tensor_info = arm_compute::TensorInfo( -+ arm_compute::TensorShape(N, M, 1, dst_batch), 1, acl_dst_data_t); - - // Set alpha (output scaling) - amp.alpha = attr.output_scales_.scales_[0]; -@@ -98,10 +88,45 @@ status_t init_conf_matmul(acl_matmul_conf_t &, memory_desc_t &src_md, - // Validate ACL transpose - if (amp.is_transA) - ACL_CHECK_VALID(arm_compute::NETranspose::validate( -- &.src_acc_info, &.src_info)); -- if (amp.is_transB) -- ACL_CHECK_VALID(arm_compute::NETranspose::validate( -- &.wei_acc_info, &.wei_info)); -+ &.src_acc_info, &.src_tensor_info)); -+ -+ bool is_fastmath_enabled = utils::one_of( -+ attr.fpmath_mode_, fpmath_mode::bf16, fpmath_mode::any); -+ amp.gemm_info.set_fast_math(is_fastmath_enabled); -+ -+ amp.gemm_info.set_fixed_format(true); -+ -+ // WeightFormat::ANY tells ACL we can handle any format -+ amp.gemm_info.set_weight_format(arm_compute::WeightFormat::ANY); -+ -+ // Get the format that the ACL kernel will expect the weights to be -+ // in (if a kernel exists). Note that these are referred to as fixed format -+ // kernels, because they require one specific weights format -+ arm_compute::WeightFormat expected_weight_format; -+ ACL_CHECK_VALID(arm_compute::NEGEMM::has_opt_impl(expected_weight_format, -+ &.src_tensor_info, &.wei_tensor_info, nullptr, -+ &.dst_tensor_info, amp.alpha, 0.0f, amp.gemm_info)); -+ -+ // Set gemm weights info to the one returned by has_opt_impl -+ amp.gemm_info.set_weight_format(expected_weight_format); -+ -+ // has_opt_impl may return a non fast math kernel, even if we requested one -+ amp.gemm_info.set_fast_math( -+ arm_compute::is_fixed_format_fast_math(expected_weight_format)); -+ -+ // Logical dimension indices -+ dim_t innermost_dim = wei_md.ndims - 1; -+ dim_t N_dim = innermost_dim; -+ dim_t K_dim = innermost_dim - 1; -+ -+ // The logical indices of dimensions related to the batch, ordered from -+ // innermost to outermost -+ std::vector batch_dims = {}; -+ for (dim_t i = K_dim - 1; i >= 0; --i) -+ batch_dims.push_back(i); -+ -+ acl_utils::reorder_to_weight_format(amp.wei_tensor_info, wei_md, -+ expected_weight_format, K_dim, N_dim, {}, batch_dims); - - return status::success; - } -diff --git a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp -index 0a5ee6a987..67bb2e78eb 100644 ---- a/src/cpu/aarch64/matmul/acl_matmul_utils.hpp -+++ b/src/cpu/aarch64/matmul/acl_matmul_utils.hpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2021-2022 Arm Ltd. and affiliates -+* Copyright 2021-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -29,25 +29,21 @@ namespace aarch64 { - struct acl_matmul_obj_t { - arm_compute::NEGEMM gemm; - arm_compute::NETranspose transA; -- arm_compute::NETranspose transB; - arm_compute::Tensor src_tensor; - arm_compute::Tensor src_acc_tensor; - arm_compute::Tensor wei_tensor; -- arm_compute::Tensor wei_acc_tensor; - arm_compute::Tensor dst_tensor; - }; - - struct acl_matmul_conf_t { - bool is_transA; -- bool is_transB; - // If this is true, the result of the matmul goes into a temporarily - // allocated ACL tensor to be accumulated into the oneDNN dst during postops - bool use_dst_acc; -- arm_compute::TensorInfo src_info; -+ arm_compute::TensorInfo src_tensor_info; - arm_compute::TensorInfo src_acc_info; -- arm_compute::TensorInfo wei_info; -- arm_compute::TensorInfo wei_acc_info; -- arm_compute::TensorInfo dst_info; -+ arm_compute::TensorInfo wei_tensor_info; -+ arm_compute::TensorInfo dst_tensor_info; - arm_compute::GEMMInfo gemm_info; - float alpha; - }; diff --git a/third_party/mkl_dnn/onednn_acl_fp32_bf16_reorder.patch b/third_party/mkl_dnn/onednn_acl_fp32_bf16_reorder.patch new file mode 100644 index 00000000000000..202902a1894a86 --- /dev/null +++ b/third_party/mkl_dnn/onednn_acl_fp32_bf16_reorder.patch @@ -0,0 +1,111 @@ + ******************************************************************************* + Copyright 2023 Arm Limited and affiliates. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ******************************************************************************* +diff --git a/src/cpu/aarch64/cpu_isa_traits.hpp b/src/cpu/aarch64/cpu_isa_traits.hpp +index 4a43b24c5..1a5cfe590 100644 +--- a/src/cpu/aarch64/cpu_isa_traits.hpp ++++ b/src/cpu/aarch64/cpu_isa_traits.hpp +@@ -1,6 +1,7 @@ + /******************************************************************************* + * Copyright 2018-2023 Intel Corporation + * Copyright 2020-2023 FUJITSU LIMITED ++* Copyright 2023 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -211,10 +212,10 @@ static inline bool mayiuse_atomic() { + return cpu().isAtomicSupported(); + } + +-inline bool isa_has_bf16(cpu_isa_t isa) { +- return false; ++static inline bool mayiuse_bf16() { ++ using namespace Xbyak_aarch64::util; ++ return cpu().isBf16Supported(); + } +- + } // namespace + + /* whatever is required to generate string literals... */ +diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp +index 6bd259ec2..5541bb702 100644 +--- a/src/cpu/aarch64/jit_uni_reorder.cpp ++++ b/src/cpu/aarch64/jit_uni_reorder.cpp +@@ -1,7 +1,7 @@ + /******************************************************************************* + * Copyright 2018-2023 Intel Corporation + * Copyright 2020-2023 FUJITSU LIMITED +-* Copyright 2022 Arm Ltd. and affiliates ++* Copyright 2022-2023 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -163,11 +163,11 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { + + bool ok = true && p.ndims > 0 + && utils::one_of(p.itype, f32, s32, data_type::s8, u8) +- && utils::one_of(p.otype, f32, s32, data_type::s8, u8) ++ && utils::one_of(p.otype, f32, bf16, s32, data_type::s8, u8) + && utils::everyone_is(0, p.ioff, p.ooff) /* do we need this? */ + && utils::one_of(p.beta, 0.f, 1.f) /* anything else? */ +- && simple_impl_desc_init(p, nullptr) +- && prb_has_small_strides(p); ++ && simple_impl_desc_init(p, nullptr) && prb_has_small_strides(p) ++ && ((p.otype != bf16) || (p.itype == f32 && mayiuse_bf16())); + + return ok; + } +@@ -648,6 +648,9 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { + cvt_v_s32_u8(startIdx, regNum); + if (idt == data_type::s8) cvt_v_s8_u8(startIdx, regNum); + break; ++ case bf16: ++ if (idt == f32) cvt_v_f32_bf16(startIdx, regNum); ++ break; + default: assert(!"unreachable"); + } + }; +@@ -1677,6 +1680,10 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { + UNROLL_INST(fcvtzs, VReg4S, tmp, tmp); + } + ++ void cvt_v_f32_bf16(const size_t startIdx, const size_t regNum) { ++ UNROLL_INST2(bfcvtn, VReg4H(i), VReg4S(i)); ++ } ++ + void cvt_z_s8_s32(const size_t startIdx, const size_t regNum) { + cvt_z_b_s(startIdx, regNum); + UNROLL_INST(sxtb, ZRegS, tmp, P_ALL_ONE / T_m, tmp); +diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp +index ba5499ba9..d4e21d316 100644 +--- a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp ++++ b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp +@@ -1,5 +1,6 @@ + /******************************************************************************* + * Copyright 2020-2022 Intel Corporation ++* Copyright 2023 Arm Ltd. and affiliates + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. +@@ -34,6 +35,8 @@ const impl_list_map_t ®ular_f32_bf16_impl_list_map() { + DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, bf16, nChw16c)) + DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, bf16, nCdhw16c)) + ++ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) ++ + DNNL_NON_X64_ONLY(REG_SR(f32, oihw, bf16, OIhw8i16o2i, fmt_order::keep)) + DNNL_NON_X64_ONLY(REG_SR(f32, goihw, bf16, gOIhw8i16o2i, fmt_order::keep)) + DNNL_NON_X64_ONLY(REG_SR(f32, oihw, bf16, OIhw8o16i2o, fmt_order::keep)) diff --git a/third_party/mkl_dnn/onednn_acl_remove_winograd.patch b/third_party/mkl_dnn/onednn_acl_remove_winograd.patch deleted file mode 100644 index 18abcc8f54e922..00000000000000 --- a/third_party/mkl_dnn/onednn_acl_remove_winograd.patch +++ /dev/null @@ -1,326 +0,0 @@ - ******************************************************************************* - Copyright 2023 Arm Limited and affiliates. - SPDX-License-Identifier: Apache-2.0 - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ******************************************************************************* -diff --git a/src/cpu/aarch64/acl_convolution_utils.cpp b/src/cpu/aarch64/acl_convolution_utils.cpp -index c46d697575..37f8ecbc06 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.cpp -+++ b/src/cpu/aarch64/acl_convolution_utils.cpp -@@ -271,54 +271,6 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - return status::success; - } - --status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, -- memory_desc_t &weights_md, memory_desc_t &dst_md, -- memory_desc_t &bias_md, const convolution_desc_t &cd, -- const primitive_attr_t &attr) { -- -- // Under these conditions, fallback to faster GEMM-based convolution -- // unless the user explicitly specifies Winograd algorithm -- // clang-format off -- if (one_of(true, src_md.dims[2] > 112, // ih -- src_md.dims[3] > 112, // iw -- src_md.dims[1] < 64, // ic -- dst_md.dims[1] < 64, // oc -- dnnl_get_max_threads() > 28) -- && cd.alg_kind == alg_kind::convolution_auto) { -- return status::unimplemented; -- } -- // clang-format on -- -- // General Compute Library checks, memory tags are also set there -- CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); -- -- const bool shape_ok -- // only unit strides allowed -- = (acp.padstride_info.stride() == std::pair {1, 1}) -- // Note: Compute Library supports arbitrary padding for wino kernels -- // but we only allow small padding to be consistent with oneDNN -- && (acp.padstride_info.pad().first <= 1) // padding left/right -- && (acp.padstride_info.pad().second <= 1) // padding top/bottom -- // only non-dilated convolutions allowed -- && (acp.dilation_info == arm_compute::Size2D(1, 1)); -- -- ACL_CHECK_SUPPORT(!shape_ok, "shape not supported by winograd kernels"); -- -- // clang-format off -- // Validate convolution manually to check for return status -- ACL_CHECK_VALID(arm_compute::NEWinogradConvolutionLayer::validate( -- &acp.src_info, -- &acp.wei_info, -- acp.with_bias ? &acp.bia_info : nullptr, -- &acp.dst_info, -- acp.padstride_info, -- acp.act_info, -- true)); // enable_fast_math flag in ACL Winograd -- // clang-format on -- -- return status::success; --} -- - } // namespace acl_convolution_utils - - } // namespace aarch64 -diff --git a/src/cpu/aarch64/acl_convolution_utils.hpp b/src/cpu/aarch64/acl_convolution_utils.hpp -index 3e56245faf..0398ab06b9 100644 ---- a/src/cpu/aarch64/acl_convolution_utils.hpp -+++ b/src/cpu/aarch64/acl_convolution_utils.hpp -@@ -66,11 +66,6 @@ status_t init_conf_indirect_gemm(acl_conv_conf_t &acp, memory_desc_t &src_md, - memory_desc_t &bias_md, const convolution_desc_t &cd, - const primitive_attr_t &attr); - --status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, -- memory_desc_t &weights_md, memory_desc_t &dst_md, -- memory_desc_t &bias_md, const convolution_desc_t &cd, -- const primitive_attr_t &attr); -- - } // namespace acl_convolution_utils - - template _lock {this->mtx}; -- // Retrieve primitive resource and configured Compute Library objects -- auto *acl_resource -- = ctx.get_resource_mapper()->get(this); -- acl_obj_t &acl_wino_obj -- = acl_resource->get_acl_obj(); -- -- return execute_forward_conv_acl< -- acl_obj_t, pd_t, data_t>( -- ctx, acl_wino_obj, pd()); --} -- --} // namespace aarch64 --} // namespace cpu --} // namespace impl --} // namespace dnnl -diff --git a/src/cpu/aarch64/acl_winograd_convolution.hpp b/src/cpu/aarch64/acl_winograd_convolution.hpp -deleted file mode 100644 -index 215635fe3f..0000000000 ---- a/src/cpu/aarch64/acl_winograd_convolution.hpp -+++ /dev/null -@@ -1,146 +0,0 @@ --/******************************************************************************* --* Copyright 2020-2022 Arm Ltd. and affiliates --* --* Licensed under the Apache License, Version 2.0 (the "License"); --* you may not use this file except in compliance with the License. --* You may obtain a copy of the License at --* --* http://www.apache.org/licenses/LICENSE-2.0 --* --* Unless required by applicable law or agreed to in writing, software --* distributed under the License is distributed on an "AS IS" BASIS, --* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --* See the License for the specific language governing permissions and --* limitations under the License. --*******************************************************************************/ -- --#ifndef CPU_AARCH64_ACL_WINOGRAD_CONVOLUTION_HPP --#define CPU_AARCH64_ACL_WINOGRAD_CONVOLUTION_HPP -- --#include "cpu/cpu_convolution_pd.hpp" -- --#include "cpu/aarch64/acl_convolution_utils.hpp" -- --namespace dnnl { --namespace impl { --namespace cpu { --namespace aarch64 { -- --struct acl_wino_resource_t : public resource_t { -- acl_wino_resource_t() -- : acl_wino_obj_(utils::make_unique< -- acl_obj_t>()) {} -- -- status_t configure(const acl_conv_conf_t &acp) { -- if (!acl_wino_obj_) return status::out_of_memory; -- -- // Init Compute Library tensors based on info from descriptor -- acl_wino_obj_->src_tensor.allocator()->init(acp.src_info); -- acl_wino_obj_->wei_tensor.allocator()->init(acp.wei_info); -- acl_wino_obj_->dst_tensor.allocator()->init(acp.dst_info); -- acl_wino_obj_->bia_tensor.allocator()->init(acp.bia_info); -- -- // clang-format off -- acl_wino_obj_->conv.configure( -- &acl_wino_obj_->src_tensor, -- &acl_wino_obj_->wei_tensor, -- acp.with_bias ? &acl_wino_obj_->bia_tensor : nullptr, -- &acl_wino_obj_->dst_tensor, -- acp.padstride_info, -- acp.act_info, -- true); // to support 5x5, 7x7 filter shapes in addition to 3x3 -- // clang-format on -- -- return status::success; -- } -- -- acl_obj_t &get_acl_obj() const { -- return *acl_wino_obj_; -- } -- -- DNNL_DISALLOW_COPY_AND_ASSIGN(acl_wino_resource_t); -- --private: -- std::unique_ptr> -- acl_wino_obj_; --}; // acl_wino_resource_t -- --struct acl_wino_convolution_fwd_t : public primitive_t { -- struct pd_t : public cpu_convolution_fwd_pd_t { -- pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, -- const typename pd_t::base_class *hint_fwd_pd) -- : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd) -- , acp_() -- , post_ops() {} -- -- DECLARE_COMMON_PD_T( -- "wino:acl", acl_wino_convolution_fwd_t, USE_GLOBAL_SCRATCHPAD); -- -- status_t init(engine_t *engine) { -- bool ok = is_fwd() -- && utils::one_of(desc()->alg_kind, -- alg_kind::convolution_auto, -- alg_kind::convolution_winograd) -- && expect_data_types(data_type::f32, data_type::f32, -- data_type::f32, data_type::f32, data_type::f32) -- && attr()->has_default_values( -- primitive_attr_t::skip_mask_t::post_ops, -- data_type::f32) -- && !has_zero_dim_memory(); -- if (!ok) return status::unimplemented; -- -- CHECK(acl_convolution_utils::init_conf_wino(acp_, src_md_, -- weights_md_, dst_md_, bias_md_, *desc(), *attr())); -- -- set_default_alg_kind(alg_kind::convolution_winograd); -- -- CHECK(post_ops.init( -- engine, attr_.post_ops_, dst_md_, acp_.act_info)); -- acp_.use_dst_acc = post_ops.has_sum(); -- -- return status::success; -- } -- -- acl_conv_conf_t acp_; -- acl_post_ops_t post_ops; -- }; -- -- acl_wino_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {} -- -- status_t create_resource( -- engine_t *engine, resource_mapper_t &mapper) const override { -- if (mapper.has_resource(this)) return status::success; -- -- auto r = utils::make_unique(); -- if (!r) return status::out_of_memory; -- -- // Configure the resource based on information from primitive descriptor -- CHECK(r->configure(pd()->acp_)); -- mapper.add(this, std::move(r)); -- -- CHECK(pd()->post_ops.create_resource(engine, mapper)); -- -- return status::success; -- } -- -- ~acl_wino_convolution_fwd_t() {} -- -- typedef typename prec_traits::type data_t; -- -- status_t execute(const exec_ctx_t &ctx) const override { -- return execute_forward(ctx); -- } -- --private: -- // To guard the const execute_forward(), the mutex must be 'mutable' -- mutable std::mutex mtx; -- status_t execute_forward(const exec_ctx_t &ctx) const; -- const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } --}; // acl_wino_convolution_fwd_t -- --} // namespace aarch64 --} // namespace cpu --} // namespace impl --} // namespace dnnl -- --#endif // CPU_AARCH64_ACL_WINOGRAD_CONVOLUTION_HPP -diff --git a/src/cpu/cpu_convolution_list.cpp b/src/cpu/cpu_convolution_list.cpp -index 4142dbc7e7..094c73aa36 100644 ---- a/src/cpu/cpu_convolution_list.cpp -+++ b/src/cpu/cpu_convolution_list.cpp -@@ -65,7 +65,6 @@ using namespace dnnl::impl::cpu::x64; - #if DNNL_AARCH64 && DNNL_AARCH64_USE_ACL - #include "cpu/aarch64/acl_gemm_convolution.hpp" - #include "cpu/aarch64/acl_indirect_gemm_convolution.hpp" --#include "cpu/aarch64/acl_winograd_convolution.hpp" - #endif - using namespace dnnl::impl::cpu::aarch64; - #endif -@@ -100,7 +99,6 @@ const std::map> &impl_list_map() - CPU_INSTANCE_SSE41(jit_sse41_1x1_convolution_fwd_t) - CPU_INSTANCE_AVX2(jit_avx2_convolution_fwd_t) - CPU_INSTANCE_SSE41(jit_sse41_convolution_fwd_t) -- CPU_INSTANCE_AARCH64_ACL(acl_wino_convolution_fwd_t) - CPU_INSTANCE_AARCH64(jit_sve_512_dw_convolution_fwd_t) - CPU_INSTANCE_AARCH64(jit_sve_512_1x1_convolution_fwd_f32_t) - CPU_INSTANCE_AARCH64(jit_sve_512_convolution_fwd_t) -diff --git a/tests/gtests/test_iface_wino_convolution.cpp b/tests/gtests/test_iface_wino_convolution.cpp -index 03861b1de4..2235ceae36 100644 ---- a/tests/gtests/test_iface_wino_convolution.cpp -+++ b/tests/gtests/test_iface_wino_convolution.cpp -@@ -59,9 +59,6 @@ protected: - input_f16.wino_supported = is_gpu; - input_int8.wino_supported = is_cpu && has_avx512_core; - input_f32.backward_supported = is_cpu && impl::dnnl_thr_syncable(); --#elif DNNL_AARCH64 && DNNL_AARCH64_USE_ACL -- const bool is_cpu = get_test_engine_kind() == engine::kind::cpu; -- input_f32.wino_supported = is_cpu; - #endif - - #else diff --git a/third_party/mkl_dnn/onednn_acl_reorder.patch b/third_party/mkl_dnn/onednn_acl_reorder.patch index 7241aca4eefc88..5da6756c70a275 100644 --- a/third_party/mkl_dnn/onednn_acl_reorder.patch +++ b/third_party/mkl_dnn/onednn_acl_reorder.patch @@ -16,7 +16,7 @@ ******************************************************************************* diff --git a/src/cpu/aarch64/acl_reorder.cpp b/src/cpu/aarch64/acl_reorder.cpp new file mode 100644 -index 0000000000..061751b555 +index 000000000..061751b55 --- /dev/null +++ b/src/cpu/aarch64/acl_reorder.cpp @@ -0,0 +1,52 @@ @@ -341,10 +341,10 @@ index 0000000000..edbc38914d + +#endif // CPU_AARCH64_ACL_REORDER_HPP diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp -index bccd2f75f4..5e5ea331ba 100644 +index a4150b619..f4d6b4de3 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp -@@ -15,6 +15,7 @@ +@@ -16,6 +16,7 @@ *******************************************************************************/ #include "cpu/reorder/cpu_reorder.hpp" @@ -352,19 +352,20 @@ index bccd2f75f4..5e5ea331ba 100644 namespace dnnl { namespace impl { -@@ -27,6 +28,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { +@@ -28,6 +29,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { // f32 -> f32 {{f32, f32, 0}, { REG_FAST_DIRECT_COPY_F32_F32 + DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::acl_reorder_fwd_t)) + DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::brgemm_matmul_matrix_B_reorder_t)) DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) -@@ -64,6 +66,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { +@@ -69,6 +71,8 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { nullptr, }}, {{f32, f32, 4}, { ++ + DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::acl_reorder_fwd_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::wino_reorder_t)) - CPU_REORDER_INSTANCE(rnn_weights_reorder_t) + + REG_FAST_DIRECT_COPY_F32_F32 diff --git a/third_party/mkl_dnn/onednn_acl_reorder_padded.patch b/third_party/mkl_dnn/onednn_acl_reorder_padded.patch deleted file mode 100644 index f290f21ec87e9b..00000000000000 --- a/third_party/mkl_dnn/onednn_acl_reorder_padded.patch +++ /dev/null @@ -1,858 +0,0 @@ - ******************************************************************************* - Copyright 2022 Arm Limited and affiliates. - SPDX-License-Identifier: Apache-2.0 - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ******************************************************************************* - -diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp -index 24d6220cf..a6cefaa20 100644 ---- a/src/cpu/aarch64/jit_uni_reorder.cpp -+++ b/src/cpu/aarch64/jit_uni_reorder.cpp -@@ -1,6 +1,7 @@ - /******************************************************************************* - * Copyright 2018-2021 Intel Corporation - * Copyright 2020-2021 FUJITSU LIMITED -+* Copyright 2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -54,6 +55,35 @@ namespace aarch64 { - - namespace tr { - -+static bool prb_has_small_strides(const prb_t &prb) { -+ constexpr ptrdiff_t max_stride = (1LL << 31) - 1; -+ for (int d = 0; d < prb.ndims; ++d) { -+ const ptrdiff_t cms = max_stride / prb.nodes[d].n; -+ const bool small_strides = true -+ && prb.nodes[d].is < cms / (int)data_type_size(prb.itype) -+ && prb.nodes[d].os < cms / (int)data_type_size(prb.otype); -+ if (!small_strides) return false; -+ } -+ return true; -+} -+ -+static bool prb_tail_friendly(const prb_t &prb) { -+ /* find optimal ndims to makes it easier to -+ * identify the blk_chunk in the loop*/ -+ int ndims = prb.full_ndims - prb.ndims; -+ -+ int n = prb.nodes[0].is; -+ for (int d = 1; d < prb.ndims; ++d) { -+ if (d != prb.blk_chunk_idx) n *= prb.nodes[d].n; -+ } -+ if (prb.ip_tail > 0 -+ && ((ndims == 0 && n != 1) -+ || (ndims > 0 && prb.ndims > prb.blk_chunk_idx))) -+ return false; -+ -+ return true; -+} -+ - /** Minimal reasonable/desirable kernel size. - * The constant might be used to determine how a problem should be split - * between kernel and threading driver. */ -@@ -121,18 +151,10 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - && utils::one_of(p.otype, f32, s32, data_type::s8, u8) - && utils::everyone_is(0, p.ioff, p.ooff) /* do we need this? */ - && utils::one_of(p.beta, 0.f, 1.f) /* anything else? */ -- && simple_impl_desc_init(p, nullptr); -+ && simple_impl_desc_init(p, nullptr) && prb_has_small_strides(p) -+ && prb_tail_friendly(p); - if (!ok) return false; - -- const ptrdiff_t max_stride = (1LL << 31) - 1; -- for (int d = 0; d < p.ndims; ++d) { -- const ptrdiff_t cms = max_stride / p.nodes[d].n; -- bool strides_ok = true -- && p.nodes[d].is < cms / (int)data_type_size(p.itype) -- && p.nodes[d].os < cms / (int)data_type_size(p.otype); -- if (!strides_ok) return false; -- } -- - return true; - } - -@@ -153,6 +175,13 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - return (int)prb_.nodes[d].ss; - } - -+ int blk_cnt() { -+ assert(prb_.blk_chunk_idx < prb_.full_ndims); -+ return (int)prb_.nodes[prb_.blk_chunk_idx].n - 1; -+ } -+ int op_padding() { return prb_.op_tail ? prb_.iblock - prb_.op_tail : 0; } -+ int ip_padding() { return prb_.ip_tail ? prb_.oblock - prb_.ip_tail : 0; } -+ - void step(int off, int prev_i_off, int prev_o_off, int prev_s_off, - int &i_off, int &o_off, int &s_off, int step_size = 1) { - i_off = prev_i_off; -@@ -385,6 +414,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - prb_.otype, u8, data_type::s8, s32, f32))) - && utils::everyone_is(8, n(0), n(1)) - && utils::everyone_is(1, os(0), is(1)) -+ && utils::everyone_is(0, prb_.ip_tail, prb_.op_tail) - && prb_.scale_type == scale_type_t::NONE && prb_.beta == 0.f; - } - -@@ -405,17 +435,14 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - bool process_direct_copy(int len) { - using namespace data_type; - -- const int simd_w = cpu_isa_traits::vlen == 16 -- ? cpu_isa_traits::vlen / itype_sz /* use 128-bit VReg */ -- : cpu_isa_traits::vlen / itype_sz -- / 2; /* use lower half of 512-bit ZReg */ -- -+ const int simd_w = cpu_isa_traits::vlen / itype_sz; - bool can_do = true && mayiuse(isa) - && utils::everyone_is(1, os(0), is(0)) - && (false || prb_.itype == prb_.otype - || (prb_.itype == s32 && prb_.otype == f32) - || (prb_.itype == f32 && prb_.otype == s32)) - && len % simd_w == 0 && n(0) % len == 0 -+ && prb_.ip_tail % simd_w == 0 && prb_.op_tail % simd_w == 0 - && prb_.scale_type == scale_type_t::NONE && prb_.beta == 0.f; - if (!can_do) return false; - -@@ -511,7 +538,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - void process_unroll_generic_step(int reg_unroll, const int *i_off, -- const int *o_off, const int *s_off) { -+ const int *o_off, const int *s_off, const int *ip_padding, -+ const bool h_padded) { - using namespace data_type; - - auto cvt2ps -@@ -571,6 +599,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - for (int ur = 1; ur < reg_unroll; ++ur) - if (o_off[ur] != o_off[ur - 1] + 1) can_store_xmm = false; - const int ur_step = can_store_xmm ? 4 : 1; -+ const int load_tail_step -+ = !can_load_xmm && can_store_xmm ? ur_step : load_step; - - const bool interim_f32 = false - || utils::one_of(f32, prb_.itype, prb_.otype) -@@ -579,55 +609,85 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - const bool need_saturation - = (utils::one_of(prb_.otype, u8, data_type::s8, s32) - && interim_f32); -- -- if (!can_load_xmm && can_store_xmm) { -- assert(ur_step == 4); -- /* load with stride */ -- for (int ur = 0; ur < reg_unroll; ur += ur_step) { -- -+ if (h_padded) { -+ for (int ur = 0; ur < reg_unroll; ur += load_tail_step) { -+ if (itype_sz == 4) -+ movi(VReg4S(ur), 0); -+ else if (itype_sz == 2) -+ movi(VReg8H(ur), 0); -+ else -+ movi(VReg16B(ur), 0); - /* x_tmp_vec = X_TMP_0 - X_TMP_4 - Do not use X_TMP_? as the last arg. */ -- for (int r = 0; r < ur_step; ++r) { -- add_imm(x_tmp_vec[r], x_ptr_in_off, -- i_off[ur + r] * itype_sz, X_DEFAULT_ADDR); -+ for (int r = 0; r < load_tail_step; ++r) { -+ if (ip_padding[ur + r] == 0) { -+ add_imm(x_tmp_vec[r], x_ptr_in_off, -+ i_off[ur + r] * itype_sz, X_DEFAULT_ADDR); -+ } - } - -- for (int r = 0; r < ur_step; ++r) { -- if (itype_sz == 4) -- ld1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); -- else if (itype_sz == 2) -- ld1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); -- else -- ld1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); -+ for (int r = 0; r < load_tail_step; ++r) { -+ if (ip_padding[ur + r] == 0) { -+ if (itype_sz == 4) -+ ld1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); -+ else if (itype_sz == 2) -+ ld1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); -+ else -+ ld1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); -+ } - } - } - } else { -- int ur = 0; -- int tmp_ur = 0; -- while (ur < reg_unroll) { -- int count = 0; -+ if (!can_load_xmm && can_store_xmm) { -+ assert(ur_step == 4); -+ /* load with stride */ -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { - -- do { -- add_imm(x_tmp_vec[count++], x_ptr_in_off, -- i_off[ur] * itype_sz, X_DEFAULT_ADDR); -- ur += load_step; -- } while (ur < reg_unroll && count < x_tmp_vec_size); -+ /* x_tmp_vec = X_TMP_0 - X_TMP_4 -+ Do not use X_TMP_? as the last arg. */ -+ for (int r = 0; r < ur_step; ++r) { -+ add_imm(x_tmp_vec[r], x_ptr_in_off, -+ i_off[ur + r] * itype_sz, X_DEFAULT_ADDR); -+ } - -- for (int i = 0; i < count; i++) { -+ for (int r = 0; r < ur_step; ++r) { -+ if (itype_sz == 4) -+ ld1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); -+ else if (itype_sz == 2) -+ ld1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); -+ else -+ ld1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); -+ } -+ } -+ } else { -+ int ur = 0; -+ int tmp_ur = 0; -+ while (ur < reg_unroll) { -+ int count = 0; -+ -+ do { -+ add_imm(x_tmp_vec[count++], x_ptr_in_off, -+ i_off[ur] * itype_sz, X_DEFAULT_ADDR); -+ ur += load_step; -+ } while (ur < reg_unroll && count < x_tmp_vec_size); -+ -+ for (int i = 0; i < count; i++) { - -- switch (load_step * itype_sz) { -- case 16: ldr(QReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 8: ldr(DReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 4: ldr(SReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 2: ldr(HReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 1: ldr(BReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- default: assert(!"unreachable"); -+ switch (load_step * itype_sz) { -+ case 16: -+ ldr(QReg(tmp_ur), ptr(x_tmp_vec[i])); -+ break; -+ case 8: ldr(DReg(tmp_ur), ptr(x_tmp_vec[i])); break; -+ case 4: ldr(SReg(tmp_ur), ptr(x_tmp_vec[i])); break; -+ case 2: ldr(HReg(tmp_ur), ptr(x_tmp_vec[i])); break; -+ case 1: ldr(BReg(tmp_ur), ptr(x_tmp_vec[i])); break; -+ default: assert(!"unreachable"); -+ } -+ tmp_ur += load_step; - } -- tmp_ur += load_step; - } - } - } -- - /* xmm[:] <-- (f32)xmm[:] */ - if (interim_f32) { - const int cvt_step = nstl::max(load_step, ur_step); -@@ -708,7 +768,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - if (s_off[r] != s_off[r - 1] + 0) - scale_load_type = scale_load_type_t::load; - -- if (scale_load_type == scale_load_type_t::bcast) { -+ if (scale_load_type == scale_load_type_t::bcast -+ && !h_padded) { - VReg4S v(xmm_scale.getIdx()); - VReg4S v_dst(ur); - add_imm(X_TMP_0, x_ptr_scale_off, s_off[ur] * stype_sz, -@@ -724,7 +785,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - if (s_off[r] != s_off[r - 1] + 1) - scale_load_type = scale_load_type_t::gather; - -- if (scale_load_type == scale_load_type_t::load) { -+ if (scale_load_type == scale_load_type_t::load -+ && !h_padded) { - uint32_t idx = xmm_scale.getIdx(); - VReg4S v_dst(ur); - add_imm(X_TMP_0, x_ptr_scale_off, s_off[ur] * stype_sz, -@@ -739,14 +801,18 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - // so gather the scale factors one by one - /*ur_step is 1 or 4. */ - for (int r = ur; r < ur + ur_step; ++r) { -- /* x_tmp_vec = X_TMP_0 - X_TMP_4 -+ if (ip_padding[r] == 0 || !h_padded) { -+ /* x_tmp_vec = X_TMP_0 - X_TMP_4 - Do not use X_TMP_? as the last arg. */ -- add_imm(x_tmp_vec[r - ur], x_ptr_scale_off, -- s_off[r] * stype_sz, X_DEFAULT_ADDR); -+ add_imm(x_tmp_vec[r - ur], x_ptr_scale_off, -+ s_off[r] * stype_sz, X_DEFAULT_ADDR); -+ } - } - for (int r = ur; r < ur + ur_step; ++r) { -- VReg4S v(xmm_scale.getIdx()); -- ld1(v[r - ur], ptr(x_tmp_vec[r - ur])); -+ if (ip_padding[r] == 0 || !h_padded) { -+ VReg4S v(xmm_scale.getIdx()); -+ ld1(v[r - ur], ptr(x_tmp_vec[r - ur])); -+ } - } - fmul(VReg4S(ur), VReg4S(ur), xmm_scale); - } -@@ -925,7 +991,15 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - } - -- void process_unroll_generic(int len) { -+ void comp_padding_flag(int ndims, int off, int len, int &i_tail) { -+ const int ip_without_padding -+ = ndims == 0 ? len - ip_padding() : prb_.ip_tail; -+ if ((ndims == 0 && off >= ip_without_padding) -+ || (ndims > 0 && (off % prb_.oblock) >= ip_without_padding)) -+ i_tail = 1; -+ } -+ -+ void process_unroll_generic(const int ndims, int len, const bool h_padded) { - const int blk = 8; - - int i_off[2 * blk] = {0}; -@@ -936,22 +1010,37 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - for (int off = 0; off < len; off += blk) { - const int reg_unroll = nstl::min(off + blk, len) - off; -+ int ip_padding[blk] = {0}; - -- /* compute offsets */ -+ /* compute offsets and tail*/ - for (int ur = off != 0 ? 0 : 1; ur < reg_unroll; ++ur) { - const int ur_c = curr * blk + ur; - const int ur_p = (ur_c - 1 + 2 * blk) % (2 * blk); // prev ur - step(off + ur, i_off[ur_p], o_off[ur_p], s_off[ur_p], - i_off[ur_c], o_off[ur_c], s_off[ur_c]); -+ if (h_padded) -+ comp_padding_flag(ndims, off + ur, len, ip_padding[ur]); - } -- - process_unroll_generic_step(reg_unroll, i_off + curr * blk, -- o_off + curr * blk, s_off + curr * blk); -+ o_off + curr * blk, s_off + curr * blk, ip_padding, -+ h_padded); - - curr = 1 - curr; - } - } - -+ void compute_ker( -+ const int ndims, const int len_unroll, const bool h_padded) { -+ bool optimized = false; -+ optimized = optimized -+ || (process_direct_copy(len_unroll) && !h_padded); -+ optimized = optimized -+ || (process_direct_copy(len_unroll) && !h_padded); -+ optimized -+ = optimized || (process_unroll_tr8x8(len_unroll) && !h_padded); -+ if (!optimized) process_unroll_generic(ndims, len_unroll, h_padded); -+ } -+ - void loop_begin(Label &l, XReg reg_cnt, int len) { - mov(reg_cnt, len); - L(l); -@@ -985,6 +1074,28 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - } - -+ void compute_blk_ker(const int len_unroll) { -+ int omp_ndims = prb_.full_ndims - prb_.ndims; -+ Label no_last_blk, end_label; -+ -+ if (prb_.ip_tail > 0 && prb_.op_tail == 0) { -+ if (omp_ndims == 0) { -+ cmp(reg_last_loop_cnt, 1); -+ bne(no_last_blk); -+ compute_ker(omp_ndims, len_unroll, true); -+ } else { -+ cmp(reg_blk_chunks, blk_cnt()); -+ bne(no_last_blk); -+ compute_ker(omp_ndims, len_unroll, true); -+ } -+ b(end_label); -+ } -+ -+ L(no_last_blk); -+ compute_ker(omp_ndims, len_unroll, false); -+ L(end_label); -+ } -+ - bool simple_impl() { - simple_impl_desc_t d; - if (!simple_impl_desc_init(prb_, &d)) return false; -@@ -1013,11 +1124,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - if (n_jit_loops > 0) - loop_begin(l_loop[0], reg_cnt[0], n(nfu + 0) / ldu); - -- bool optimized = false; -- optimized = optimized || process_direct_copy(d.len_unroll); -- optimized = optimized || process_direct_copy(d.len_unroll); -- optimized = optimized || process_unroll_tr8x8(d.len_unroll); -- if (!optimized) process_unroll_generic(d.len_unroll); -+ compute_blk_ker(d.len_unroll); - - if (n_jit_loops > 0) - loop_end(l_loop[0], reg_cnt[0], n(nfu + 0) / ldu, is(nfu + 0) * ldu, -@@ -1236,9 +1343,13 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - add_imm(X_TMP_0, abi_param1, PARAM(in), X_TMP_2); - add_imm(X_TMP_1, abi_param1, PARAM(out), X_TMP_2); -+ add_imm(reg_blk, abi_param1, PARAM(blk_chunks), reg_blk); - ldr(reg_ptr_in, ptr(X_TMP_0)); - ldr(reg_ptr_out, ptr(X_TMP_1)); -+ ldr(reg_blk_chunks, ptr(reg_blk)); -+ - #undef PARAM -+ mov_imm(reg_last_loop_cnt, 1); - - mov(x_ptr_in_off, XReg(reg_ptr_in.getIdx())); - mov(x_ptr_out_off, XReg(reg_ptr_out.getIdx())); -@@ -1282,6 +1393,10 @@ private: - XReg reg_off_out = x9; - XReg reg_off_scale = x10; - -+ XReg reg_blk = x11; -+ XReg reg_blk_chunks = x12; -+ XReg reg_last_loop_cnt = x11; -+ - XReg reg_tmp = x0; - - VReg4S xmm_scale = v15.s; -@@ -1416,10 +1531,16 @@ static void prb_thread_kernel_balance( - for (int d = 0; d < prb.ndims; ++d) - sz_total *= prb.nodes[d].n; - -+ /* The general expression for sz_drv_thr can be written as -+ * sz_drv_min = C0 + FC * (nthr > 1 ? 1 : 0) + VC * (nthr - 1) -+ * where FC and VC are fixed and variable costs respectively. -+ * Though for now, the below heuristic seems to be good enough */ -+ const size_t sz_drv_thr = (nthr > 1) ? 16 * nthr : 1; -+ - /* sz_drv_min is the minimal size for the parallel - * driver required for good parallelization */ - const size_t sz_drv_min -- = nstl::min(16 * nthr, utils::div_up(sz_total, 1024)); -+ = nstl::min(sz_drv_thr, utils::div_up(sz_total, 1024)); - - /* kdims -- # of dimensions processed by a kernel - * sz_ker_cur -- product of the dimension processed by a kernel -@@ -1440,7 +1561,8 @@ static void prb_thread_kernel_balance( - * (less than tr::ker_prb_size_min). In that case try to split the - * innermost driver dimension into two, to increase sz_ker_cur. */ - bool want_borrow_ker_from_drv = true && kdims < prb.ndims -- && sz_ker_cur < tr::ker_prb_size_min && sz_drv_cur > sz_drv_min; -+ && sz_ker_cur < tr::ker_prb_size_min && sz_drv_cur > sz_drv_min -+ && kdims != prb.blk_chunk_idx; - if (want_borrow_ker_from_drv) { - /* sz_want_borrow is the minimal sz, so that: - * o) sz_ker_cur * sz_want_borrow >= tr::ker_prb_size_min -@@ -1464,7 +1586,7 @@ static void prb_thread_kernel_balance( - * try to split the outermost kernel dimension into two, to increase - * sz_drv_cur. */ - bool want_borrow_drv_from_ker = true && sz_ker_cur > tr::ker_prb_size_min -- && sz_drv_cur < sz_drv_min; -+ && sz_drv_cur < sz_drv_min && kdims != prb.blk_chunk_idx; - if (want_borrow_drv_from_ker) { - size_t sz_want_borrow = utils::div_up(sz_drv_min, sz_drv_cur); - for (; prb.nodes[kdims - 1].n % sz_want_borrow; ++sz_want_borrow) -@@ -1518,6 +1640,8 @@ status_t jit_uni_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, - prb_dump(prb); - }); - -+ CHECK(prb_check_blk(prb, *dst_md)); -+ - int ndims_ker_max; - int nthr = dnnl_get_max_threads(); - prb_thread_kernel_balance(prb, ndims_ker_max, nthr); -@@ -1552,7 +1676,7 @@ status_t jit_uni_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, - - void jit_uni_reorder_t::omp_driver_0d( - int off, const char *in, char *out, const float *scale) const { -- tr::call_param_t c {in, out, scale}; -+ tr::call_param_t c {in, out, scale, 0}; - (*kernel_)(&c); - } - -@@ -1564,6 +1688,7 @@ void jit_uni_reorder_t::omp_driver_1d(int ithr, int nthr, int off, - c.in = in + d0 * ns[0].is * data_type_size(pd()->prb_.itype); - c.out = out + d0 * ns[0].os * data_type_size(pd()->prb_.otype); - c.scale = scale + d0 * ns[0].ss; -+ c.blk_chunks = d0; - (*kernel_)(&c); - }); - } -@@ -1571,6 +1696,7 @@ void jit_uni_reorder_t::omp_driver_1d(int ithr, int nthr, int off, - void jit_uni_reorder_t::omp_driver_2d(int ithr, int nthr, int off, - const char *in, char *out, const float *scale) const { - const tr::node_t *ns = pd()->prb_.nodes + off; -+ const int blk_idx_off = pd()->prb_.blk_chunk_idx - off; - for_nd(ithr, nthr, (ptrdiff_t)ns[1].n, (ptrdiff_t)ns[0].n, - [&](ptrdiff_t d1, ptrdiff_t d0) { - auto c = tr::call_param_t(); -@@ -1581,6 +1707,7 @@ void jit_uni_reorder_t::omp_driver_2d(int ithr, int nthr, int off, - + (d0 * ns[0].os + d1 * ns[1].os) - * data_type_size(pd()->prb_.otype); - c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss; -+ c.blk_chunks = utils::pick(blk_idx_off, d0, d1); - (*kernel_)(&c); - }); - } -@@ -1588,6 +1715,7 @@ void jit_uni_reorder_t::omp_driver_2d(int ithr, int nthr, int off, - void jit_uni_reorder_t::omp_driver_3d(int ithr, int nthr, int off, - const char *in, char *out, const float *scale) const { - const tr::node_t *ns = pd()->prb_.nodes + off; -+ const int blk_idx_off = pd()->prb_.blk_chunk_idx - off; - for_nd(ithr, nthr, (ptrdiff_t)ns[2].n, (ptrdiff_t)ns[1].n, - (ptrdiff_t)ns[0].n, [&](ptrdiff_t d2, ptrdiff_t d1, ptrdiff_t d0) { - auto c = tr::call_param_t(); -@@ -1598,6 +1726,7 @@ void jit_uni_reorder_t::omp_driver_3d(int ithr, int nthr, int off, - + (d0 * ns[0].os + d1 * ns[1].os + d2 * ns[2].os) - * data_type_size(pd()->prb_.otype); - c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss; -+ c.blk_chunks = utils::pick(blk_idx_off, d0, d1, d2); - (*kernel_)(&c); - }); - } -@@ -1605,6 +1734,7 @@ void jit_uni_reorder_t::omp_driver_3d(int ithr, int nthr, int off, - void jit_uni_reorder_t::omp_driver_4d(int ithr, int nthr, int off, - const char *in, char *out, const float *scale) const { - const tr::node_t *ns = pd()->prb_.nodes + off; -+ const int blk_idx_off = pd()->prb_.blk_chunk_idx - off; - for_nd(ithr, nthr, (ptrdiff_t)ns[3].n, (ptrdiff_t)ns[2].n, - (ptrdiff_t)ns[1].n, (ptrdiff_t)ns[0].n, - [&](ptrdiff_t d3, ptrdiff_t d2, ptrdiff_t d1, ptrdiff_t d0) { -@@ -1619,6 +1749,7 @@ void jit_uni_reorder_t::omp_driver_4d(int ithr, int nthr, int off, - * data_type_size(pd()->prb_.otype); - c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss - + d3 * ns[3].ss; -+ c.blk_chunks = utils::pick(blk_idx_off, d0, d1, d2, d3); - (*kernel_)(&c); - }); - } -diff --git a/src/cpu/aarch64/jit_uni_reorder.hpp b/src/cpu/aarch64/jit_uni_reorder.hpp -index 88762756c..2fb6f0f89 100644 ---- a/src/cpu/aarch64/jit_uni_reorder.hpp -+++ b/src/cpu/aarch64/jit_uni_reorder.hpp -@@ -1,6 +1,7 @@ - /******************************************************************************* - * Copyright 2018-2020 Intel Corporation - * Copyright 2020 FUJITSU LIMITED -+* Copyright 2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -52,11 +53,19 @@ struct prb_t { - ptrdiff_t ooff; - scale_type_t scale_type; - float beta; -+ int full_ndims; -+ int ip_tail; -+ int op_tail; -+ int iblock; -+ int oblock; -+ int blk_chunk_idx; - }; - - status_t prb_init(prb_t &prb, const memory_desc_t &imd, - const memory_desc_t &omd, const primitive_attr_t *attr); - -+status_t prb_check_blk(prb_t &prb, const memory_desc_t &imd); -+ - /** sorts the problem nodes so that output strides come in ascending order */ - void prb_normalize(prb_t &p); - -@@ -82,6 +91,7 @@ struct call_param_t { - const void *in; - void *out; - const float *scale; -+ size_t blk_chunks; - }; - - struct kernel_t { -diff --git a/src/cpu/aarch64/jit_uni_reorder_utils.cpp b/src/cpu/aarch64/jit_uni_reorder_utils.cpp -index 3d6e424e3..7123811f8 100644 ---- a/src/cpu/aarch64/jit_uni_reorder_utils.cpp -+++ b/src/cpu/aarch64/jit_uni_reorder_utils.cpp -@@ -1,6 +1,7 @@ - /******************************************************************************* - * Copyright 2018-2021 Intel Corporation - * Copyright 2020 FUJITSU LIMITED -+* Copyright 2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -15,7 +16,8 @@ - * limitations under the License. - *******************************************************************************/ - --#include -+#include -+#include - - #include "common/c_types_map.hpp" - #include "common/dnnl_thread.hpp" -@@ -46,8 +48,65 @@ struct layout_desc_t { - strides_t strides; - }; - --status_t cvt_mem_desc_to_layout_desc( -- const memory_desc_t &md_, layout_desc_t &ld, const dims_t &blocks) { -+static status_t compute_blk_and_tail( -+ const memory_desc_t &md_, const int idx, int &blk, int &tail) { -+ const auto md = memory_desc_wrapper(md_); -+ const auto &bd = md.blocking_desc(); -+ if (tail == 0) return status::success; -+ -+ const std::set unique_inner_idxs( -+ bd.inner_idxs, bd.inner_idxs + bd.inner_nblks); -+ std::set dims_with_multiple_blks; -+ for (dim_t dim : unique_inner_idxs) { -+ if (std::count(bd.inner_idxs, bd.inner_idxs + bd.inner_nblks, dim) > 1) -+ dims_with_multiple_blks.insert(dim); -+ } -+ -+ // Dims that have a tail and have multiple blocks are not supported by the jit kernel yet. -+ // For example: -+ // src_tag = abcd -+ // dst_tag = ABcd16b16a4b -+ // 16x15x3x3 -+ // In this case, 'b' dim has two blocks and has a tail. It is not a supported case. -+ if (dims_with_multiple_blks.find(idx) != dims_with_multiple_blks.end()) -+ return status::unimplemented; -+ -+ // Only supports inconsistent padding in single and double blocks -+ // and the total block size <= 256 -+ for (int iblk = bd.inner_nblks - 1; iblk > 0; --iblk) { -+ if (bd.inner_idxs[iblk] == idx) break; -+ blk *= bd.inner_blks[iblk]; -+ tail *= bd.inner_blks[iblk]; -+ } -+ if (unique_inner_idxs.size() > 2 || blk > 256) return status::unimplemented; -+ -+ return status::success; -+} -+ -+static status_t compute_chunk_idx(const prb_t &p, const memory_desc_t &imd_, -+ const memory_desc_t &omd_, const int blk_idx, int &chunk_idx) { -+ const auto imd = memory_desc_wrapper(imd_); -+ const auto omd = memory_desc_wrapper(omd_); -+ const auto &ibd = imd.blocking_desc(); -+ const auto &obd = omd.blocking_desc(); -+ if (p.ip_tail == 0 && p.op_tail == 0) return status::success; -+ -+ const ptrdiff_t is -+ = ibd.strides[blk_idx] * obd.inner_blks[obd.inner_idxs[blk_idx]]; -+ const ptrdiff_t os = obd.strides[blk_idx]; -+ -+ for (int i = blk_idx; i < omd.ndims(); ++i) { -+ if (p.nodes[i].os == os && p.nodes[i].is == is) { -+ chunk_idx = i; -+ return status::success; -+ } -+ } -+ -+ return status::invalid_arguments; -+} -+ -+status_t cvt_mem_desc_to_layout_desc(const memory_desc_t &md_, -+ layout_desc_t &ld, const dims_t &blocks, const dims_t &ext_padding) { - const auto md = memory_desc_wrapper(md_); - - bool ok = true && md.is_blocking_desc() && md.extra().flags == 0; -@@ -75,7 +134,7 @@ status_t cvt_mem_desc_to_layout_desc( - stride *= bd.inner_blks[iblk]; - } - } -- P(d, md.padded_dims()[d] / blocks[d], bd.strides[d]); -+ P(d, (md.padded_dims()[d] + ext_padding[d]) / blocks[d], bd.strides[d]); - - // TODO: NOW: revisit, do we need a reverse? - // TODO: NOW: consider using strides instead of block sizes in md -@@ -98,7 +157,8 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - - auto check_post_ops = [](const primitive_attr_t *attr) { - const auto &po = attr->post_ops_; -- return po.len() == 0 || (po.len() == 1 && po.entry_[0].is_sum(false)); -+ return po.len() == 0 -+ || (po.len() == 1 && po.contain(primitive_kind::sum, 0)); - }; - - bool ok = im_d.is_blocking_desc() && om_d.is_blocking_desc() -@@ -110,26 +170,58 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - && check_post_ops(attr); - if (!ok) return unimplemented; - -- dims_t iblocks, oblocks; -+ dims_t iblocks, oblocks, ip_padding, op_padding; - im_d.compute_blocks(iblocks); - om_d.compute_blocks(oblocks); -+ utils::array_set(ip_padding, 0, im_d.ndims()); -+ utils::array_set(op_padding, 0, om_d.ndims()); -+ -+ /* padding_dim consistency check -+ * only supports inconsitent padding for src -+ * TODO: Add inconsistent padding support for dst */ -+ int ip_tail = 0; -+ int op_tail = 0; -+ int iblk_w_tail = 1; -+ int oblk_w_tail = 1; -+ int blk_idx = 0; - -- /* padding_dim consistency check */ - for (int d = 0; d < im_d.ndims(); ++d) { -- const auto pdim = im_d.padded_dims()[d]; -- bool ok = true && pdim == om_d.padded_dims()[d] -- && pdim % iblocks[d] == 0 && pdim % oblocks[d] == 0; -- if (!ok) return unimplemented; -+ const int ip_tmp_dim = im_d.padded_dims()[d]; -+ const int op_tmp_dim = om_d.padded_dims()[d]; -+ const int ip_tmp_tail = ip_tmp_dim % oblocks[d]; -+ const int op_tmp_tail = op_tmp_dim % iblocks[d]; -+ -+ const bool pdim_consistent = ip_tmp_dim == op_tmp_dim -+ && ip_tmp_tail == 0 && op_tmp_tail == 0; -+ const bool pdim_tail = ip_tmp_tail > 0 -+ && (ip_tmp_dim + oblocks[d] - ip_tmp_tail) == op_tmp_dim -+ && op_tmp_tail == 0 && ip_tail == 0; -+ if (!pdim_consistent && !pdim_tail) return status::unimplemented; -+ if (pdim_tail) { -+ blk_idx = d; -+ ip_tail = ip_tmp_tail; -+ op_tail = op_tmp_tail; -+ iblk_w_tail = iblocks[d]; -+ oblk_w_tail = oblocks[d]; -+ ip_padding[d] = oblocks[d] - ip_tmp_tail; -+ op_padding[d] = iblocks[d] - op_tmp_tail; -+ } - } -+ CHECK(compute_blk_and_tail(omd, blk_idx, oblk_w_tail, ip_tail)); - - layout_desc_t ild, old; -- status_t status = cvt_mem_desc_to_layout_desc(imd, ild, iblocks); -+ status_t status -+ = cvt_mem_desc_to_layout_desc(imd, ild, iblocks, ip_padding); - if (status != success) return status; -- status = cvt_mem_desc_to_layout_desc(omd, old, oblocks); -+ status = cvt_mem_desc_to_layout_desc(omd, old, oblocks, op_padding); - if (status != success) return status; - - p.itype = ild.dt; - p.otype = old.dt; -+ p.ip_tail = ip_tail; -+ p.op_tail = op_tail; -+ p.iblock = iblk_w_tail; -+ p.oblock = oblk_w_tail; - - p.scale_type = attr->output_scales_.has_default_values() - ? scale_type_t::NONE -@@ -156,7 +248,6 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - - while (i_pos < ild.ndims && o_pos < old.ndims) { - assert(ild.id[i_pos] == old.id[o_pos]); -- if (ild.id[i_pos] != old.id[o_pos]) return runtime_error; - - assert(ndims < max_ndims); - if (ndims == max_ndims) return runtime_error; -@@ -191,7 +282,12 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - ild.dims[i_pos] = factor; - } - } -+ int blk_chunk_idx = ndims; -+ CHECK(compute_chunk_idx(p, imd, omd, blk_idx, blk_chunk_idx)); -+ - p.ndims = ndims; -+ p.full_ndims = ndims; -+ p.blk_chunk_idx = blk_chunk_idx; - - p.ioff = memory_desc_wrapper(imd).offset0(); - p.ooff = memory_desc_wrapper(omd).offset0(); -@@ -211,8 +307,28 @@ void prb_normalize(prb_t &p) { - && p.nodes[j].n < p.nodes[min_pos].n); - if (new_min) min_pos = j; - } -- if (min_pos != d) nstl::swap(p.nodes[d], p.nodes[min_pos]); -+ if (min_pos != d) { -+ nstl::swap(p.nodes[d], p.nodes[min_pos]); -+ if (p.blk_chunk_idx == min_pos || p.blk_chunk_idx == d) -+ p.blk_chunk_idx = p.blk_chunk_idx == min_pos ? d : min_pos; -+ } -+ } -+} -+ -+status_t prb_check_blk(prb_t &p, const memory_desc_t &md_) { -+ const auto md = memory_desc_wrapper(md_); -+ const auto &bd = md.blocking_desc(); -+ if (p.ip_tail == 0) return status::success; -+ -+ // Check if the inner blocks and p.nodes[blk].n in the firsti nblks -+ // is equivalent in reverse order when has tail in block layout. -+ const int nblk = bd.inner_nblks; -+ for (int iblk = 0; iblk < nblk; ++iblk) { -+ if (bd.inner_blks[nblk - iblk - 1] -+ != static_cast(p.nodes[iblk].n)) -+ return status::unimplemented; - } -+ return status::success; - } - - void prb_simplify(prb_t &p) { -@@ -225,18 +341,29 @@ void prb_simplify(prb_t &p) { - for (int d = 0; d < p.ndims - 1; ++d) { - auto &this_node = p.nodes[d + 0]; - auto &next_node = p.nodes[d + 1]; -+ const bool skip_blk_idx = (p.ip_tail > 0 || p.op_tail > 0) -+ && (p.blk_chunk_idx == d || p.blk_chunk_idx == d + 1); - const bool fold = false -- || next_node.n == (size_t)1 // trivial case, just drop next node -+ || (next_node.n == static_cast(1) -+ && !skip_blk_idx) // trivial case, just drop next node - || (true // or real folding if possible -- && next_node.is == (ptrdiff_t)this_node.n * this_node.is -- && next_node.os == (ptrdiff_t)this_node.n * this_node.os -+ && !skip_blk_idx -+ && next_node.is -+ == static_cast( -+ this_node.n * this_node.is) -+ && next_node.os -+ == static_cast( -+ this_node.n * this_node.os) - && next_node.ss -- == (ptrdiff_t)this_node.n * this_node.ss); -+ == static_cast( -+ this_node.n * this_node.ss)); - if (fold) { - this_node.n *= next_node.n; - for (int j = d + 2; j < p.ndims; ++j) - p.nodes[j - 1] = p.nodes[j]; -+ if (d < p.blk_chunk_idx) --p.blk_chunk_idx; - --p.ndims; -+ --p.full_ndims; - --d; // make another try - } - } -@@ -251,6 +378,8 @@ void prb_node_split(prb_t &p, int dim, size_t n1) { - assert(p.nodes[dim].n % n1 == 0); - - p.ndims += 1; -+ p.full_ndims += 1; -+ if (dim < p.blk_chunk_idx) p.blk_chunk_idx += 1; - - for (int d = p.ndims; d > dim + 1; --d) - p.nodes[d] = p.nodes[d - 1]; diff --git a/third_party/mkl_dnn/onednn_acl_reorder_update.patch b/third_party/mkl_dnn/onednn_acl_reorder_update.patch deleted file mode 100644 index 3ac5a62906ff4c..00000000000000 --- a/third_party/mkl_dnn/onednn_acl_reorder_update.patch +++ /dev/null @@ -1,4193 +0,0 @@ -From b84c533dad4db495a92fc6d390a7db5ebd938a88 Mon Sep 17 00:00:00 2001 -From: Kentaro Kawakami -Date: Tue, 1 Nov 2022 09:33:41 +0900 -Subject: [PATCH] cpu: aarch64: reorder: support jit-ed blk_reorder - ---- - src/cpu/aarch64/jit_generator.hpp | 20 + - src/cpu/aarch64/jit_uni_reorder.cpp | 2315 +++++++++++++---- - src/cpu/aarch64/jit_uni_reorder.hpp | 183 +- - src/cpu/aarch64/jit_uni_reorder_utils.cpp | 482 ++-- - .../reorder/cpu_reorder_regular_f32_f32.cpp | 6 + - .../reorder/cpu_reorder_regular_f32_s32.cpp | 2 + - .../reorder/cpu_reorder_regular_f32_s8.cpp | 2 + - .../reorder/cpu_reorder_regular_f32_u8.cpp | 2 + - src/cpu/reorder/cpu_reorder_regular_s32.cpp | 2 + - src/cpu/reorder/cpu_reorder_regular_s8.cpp | 2 + - src/cpu/reorder/cpu_reorder_regular_u8.cpp | 2 + - 11 files changed, 2272 insertions(+), 746 deletions(-) - -diff --git a/src/cpu/aarch64/jit_generator.hpp b/src/cpu/aarch64/jit_generator.hpp -index dd781a622e1..12de9fa8c01 100644 ---- a/src/cpu/aarch64/jit_generator.hpp -+++ b/src/cpu/aarch64/jit_generator.hpp -@@ -435,6 +435,26 @@ class jit_generator : public Xbyak_aarch64::CodeGenerator, public c_compatible { - Xbyak_aarch64::ZRegD(z3.getIdx())); - } - -+ void uni_ld1rw(const Xbyak_aarch64::VReg4S &dst, -+ const Xbyak_aarch64::XReg &base, const int64_t off) { -+ if (off == 0) { -+ ld1r(dst, ptr(base)); -+ } else { -+ add_imm(X_DEFAULT_ADDR, base, off, X_TMP_0); -+ ld1r(dst, ptr(X_DEFAULT_ADDR)); -+ } -+ } -+ -+ void uni_ld1rw(const Xbyak_aarch64::ZRegS &dst, -+ const Xbyak_aarch64::XReg &base, const int64_t off) { -+ if (-32 <= off && off < 32) { -+ ld1rw(dst, P_ALL_ONE / Xbyak_aarch64::T_z, ptr(base, (int)off)); -+ } else { -+ add_imm(X_DEFAULT_ADDR, base, off, X_TMP_0); -+ ld1rw(dst, P_ALL_ONE / Xbyak_aarch64::T_z, ptr(X_DEFAULT_ADDR)); -+ } -+ } -+ - void uni_ldr( - const Xbyak_aarch64::VReg &dst, const Xbyak_aarch64::XReg &addr) { - ldr(Xbyak_aarch64::QReg(dst.getIdx()), ptr(addr)); -diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp -index a6cefaa20e8..a708da808c0 100644 ---- a/src/cpu/aarch64/jit_uni_reorder.cpp -+++ b/src/cpu/aarch64/jit_uni_reorder.cpp -@@ -1,6 +1,6 @@ - /******************************************************************************* --* Copyright 2018-2021 Intel Corporation --* Copyright 2020-2021 FUJITSU LIMITED -+* Copyright 2018-2022 Intel Corporation -+* Copyright 2020-2022 FUJITSU LIMITED - * Copyright 2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); -@@ -19,19 +19,21 @@ - #include - #include - --#include "dnnl_debug.h" -+#include "oneapi/dnnl/dnnl_debug.h" - - #include "common/c_types_map.hpp" -+#include "common/dnnl_thread.hpp" - #include "common/memory_desc_wrapper.hpp" - #include "common/nstl.hpp" - #include "common/primitive.hpp" - #include "common/type_helpers.hpp" - #include "common/utils.hpp" - --#include "cpu/aarch64/jit_uni_reorder.hpp" - #include "cpu/cpu_primitive.hpp" - #include "cpu/reorder/cpu_reorder_pd.hpp" - -+#include "cpu/aarch64/jit_uni_reorder.hpp" -+ - #include "cpu/aarch64/jit_generator.hpp" - - // #define TR_DEBUG -@@ -67,23 +69,6 @@ static bool prb_has_small_strides(const prb_t &prb) { - return true; - } - --static bool prb_tail_friendly(const prb_t &prb) { -- /* find optimal ndims to makes it easier to -- * identify the blk_chunk in the loop*/ -- int ndims = prb.full_ndims - prb.ndims; -- -- int n = prb.nodes[0].is; -- for (int d = 1; d < prb.ndims; ++d) { -- if (d != prb.blk_chunk_idx) n *= prb.nodes[d].n; -- } -- if (prb.ip_tail > 0 -- && ((ndims == 0 && n != 1) -- || (ndims > 0 && prb.ndims > prb.blk_chunk_idx))) -- return false; -- -- return true; --} -- - /** Minimal reasonable/desirable kernel size. - * The constant might be used to determine how a problem should be split - * between kernel and threading driver. */ -@@ -96,6 +81,9 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - void operator()(const call_param_t *c) const override { - jit_generator::operator()(c); - } -+ void operator()(const tail_call_param_t *c) const override { -+ jit_generator::operator()(c); -+ } - - status_t create_kernel() override { return jit_generator::create_kernel(); } - -@@ -105,30 +93,53 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - }; - - struct simple_impl_desc_t { -- int ndims_full_unroll; -- int len_last_dim_unroll; -- int len_unroll; -+ int ndims_full_unroll = 0; -+ int len_last_dim_unroll = 0; -+ int tail_len_unroll = 0; -+ int len_unroll = 0; - }; - -+#define PARAM(x) \ -+ abi_param1, \ -+ prb_.is_tail_present ? offsetof(tail_call_param_t, base_params) \ -+ + offsetof(call_param_t, x) \ -+ : offsetof(call_param_t, x) -+#define TAIL_PARAM(x) abi_param1, offsetof(tail_call_param_t, x) -+ - static bool simple_impl_desc_init( - const prb_t &prb, simple_impl_desc_t *desc) { - const int ndims = prb.ndims; - - int ndims_full_unroll = 0; - int len_last_dim_unroll = 1; -+ int tail_len_unroll = 0; - int len_unroll = 1; - -- for (int d = 0; d < ndims; ++d) { -- auto &node = prb.nodes[d]; -- if (len_unroll * node.n <= len_unroll_max) { -- ndims_full_unroll++; -- len_unroll *= node.n; -- } else { -- len_last_dim_unroll = len_unroll_max / len_unroll; -- while (node.n % len_last_dim_unroll) -- --len_last_dim_unroll; -- len_unroll *= len_last_dim_unroll; -- break; -+ // It is responsible for finding as many values -+ // as kernel can unroll. If tail is present then -+ // kernel will unroll only last node (possible improvement). -+ // If there is no tail kernel can unroll a few nodes without any loops etc. -+ // ndims_full_unroll - how many nodes will be unrolled -+ // len_last_dim_unroll - what piece of last unrolled node will be unrolled -+ if (prb.is_tail_present) { -+ ndims_full_unroll = 1; -+ len_unroll = prb.nodes[0].n; -+ tail_len_unroll = prb.nodes[0].is_zero_pad_needed -+ ? 0 -+ : static_cast(prb.nodes[0].tail_size); -+ } else { -+ for (int d = 0; d < ndims; ++d) { -+ const auto &node = prb.nodes[d]; -+ if (len_unroll * node.n <= len_unroll_max) { -+ ndims_full_unroll++; -+ len_unroll *= node.n; -+ } else { -+ len_last_dim_unroll = len_unroll_max / len_unroll; -+ while (node.n % len_last_dim_unroll) -+ --len_last_dim_unroll; -+ len_unroll *= len_last_dim_unroll; -+ break; -+ } - } - } - -@@ -137,6 +148,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - if (desc) { - desc->ndims_full_unroll = ndims_full_unroll; - desc->len_last_dim_unroll = len_last_dim_unroll; -+ desc->tail_len_unroll = tail_len_unroll; - desc->len_unroll = len_unroll; - } - -@@ -151,62 +163,69 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - && utils::one_of(p.otype, f32, s32, data_type::s8, u8) - && utils::everyone_is(0, p.ioff, p.ooff) /* do we need this? */ - && utils::one_of(p.beta, 0.f, 1.f) /* anything else? */ -- && simple_impl_desc_init(p, nullptr) && prb_has_small_strides(p) -- && prb_tail_friendly(p); -- if (!ok) return false; -+ && simple_impl_desc_init(p, nullptr) -+ && prb_has_small_strides(p); - -- return true; -+ return ok; - } - -- int n(int d) { -- assert(d < prb_.ndims); -- return (int)prb_.nodes[d].n; -- } -- int is(int d) { -- assert(d < prb_.ndims); -- return (int)prb_.nodes[d].is; -- } -- int os(int d) { -- assert(d < prb_.ndims); -- return (int)prb_.nodes[d].os; -+ XReg o_addr(int o_off, bool with_type_multiplier = true) { -+ if (o_off) { -+ add_imm(X_DEFAULT_ADDR, x_ptr_out_off, -+ o_off * (with_type_multiplier ? otype_sz_ : 1), X_TMP_0); -+ return X_DEFAULT_ADDR; -+ } -+ -+ return x_ptr_out_off; - } -- int ss(int d) { -- assert(d < prb_.ndims); -- return (int)prb_.nodes[d].ss; -+ -+ XReg c_addr(int c_off) { -+ if (c_off) { -+ add_imm(X_DEFAULT_ADDR, x_ptr_comp_off, c_off, X_TMP_0); -+ return X_DEFAULT_ADDR; -+ } -+ -+ return x_ptr_comp_off; - } - -- int blk_cnt() { -- assert(prb_.blk_chunk_idx < prb_.full_ndims); -- return (int)prb_.nodes[prb_.blk_chunk_idx].n - 1; -+ XReg data_chunk_addr(int node_id) { -+ add_imm(X_DEFAULT_ADDR, abi_param1, -+ offsetof(tail_call_param_t, curr_data_chunks) -+ + sizeof(int64_t) * (node_id), -+ X_TMP_0); -+ return X_DEFAULT_ADDR; - } -- int op_padding() { return prb_.op_tail ? prb_.iblock - prb_.op_tail : 0; } -- int ip_padding() { return prb_.ip_tail ? prb_.oblock - prb_.ip_tail : 0; } - - void step(int off, int prev_i_off, int prev_o_off, int prev_s_off, -- int &i_off, int &o_off, int &s_off, int step_size = 1) { -+ int prev_c_off, int &i_off, int &o_off, int &s_off, int &c_off, -+ int step_size = 1) { - i_off = prev_i_off; - o_off = prev_o_off; - s_off = prev_s_off; -+ c_off = prev_c_off; - - if (off == 0) return; - - int start_dim = 0, dims_prod = 1; - for (; start_dim < prb_.ndims && dims_prod != step_size; ++start_dim) -- dims_prod *= n(start_dim); -+ dims_prod *= prb_.n(start_dim); - assert(start_dim < prb_.ndims); - off /= step_size; - -- for (int d = start_dim; d < prb_.ndims; ++d) { -- i_off += is(d); -- o_off += os(d); -- s_off += ss(d); -+ for (int dim_id = start_dim; dim_id < prb_.ndims; ++dim_id) { -+ i_off += prb_.is(dim_id); -+ o_off += prb_.os(dim_id); -+ s_off += prb_.ss(dim_id); -+ c_off += prb_.cs(dim_id); -+ -+ if (off % prb_.n(dim_id)) break; - -- if (off % n(d)) break; -+ i_off += -prb_.n(dim_id) * prb_.is(dim_id); -+ o_off += -prb_.n(dim_id) * prb_.os(dim_id); -+ s_off += -prb_.n(dim_id) * prb_.ss(dim_id); -+ c_off += -prb_.n(dim_id) * prb_.cs(dim_id); - -- i_off += -n(d) * is(d); -- o_off += -n(d) * os(d); -- s_off += -n(d) * ss(d); -- off /= n(d); -+ off /= prb_.n(dim_id); - - if (off == 0) break; /* FIXME: is it really required? */ - } -@@ -215,8 +234,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - void step(int off, int prev_i_off, int prev_o_off, int &i_off, int &o_off, - int step_size = 1) { - int dummy = 0; -- step(off, prev_i_off, prev_o_off, dummy, i_off, o_off, dummy, -- step_size); -+ step(off, prev_i_off, prev_o_off, dummy, dummy, i_off, o_off, dummy, -+ dummy, step_size); - } - - void tr8x8_sve256(int i_off, int o_off) { -@@ -278,40 +297,36 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - && interim_f32); - const uint64_t sveLen = get_sve_length(); - -- add_imm(X_TMP_0, XReg(x_ptr_in_off), i_off * itype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_1, X_TMP_0, is(0) * itype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_2, X_TMP_1, is(0) * itype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_3, X_TMP_2, is(0) * itype_sz, X_DEFAULT_ADDR); -- -- if (unroll * itype_sz == 32) -- for (uint32_t i = 0; i < 4; i++) -- ld1w(ZRegS {i}, p_lsb_256 / T_z, ptr(x_tmp_vec[i])); -- else if (unroll * itype_sz == 16) -- for (uint32_t i = 0; i < 4; i++) -- ldr(QReg {i}, ptr(x_tmp_vec[i])); -- else if (unroll * itype_sz == 8) -- for (uint32_t i = 0; i < 4; i++) -- ldr(DReg {i}, ptr(x_tmp_vec[i])); -- -- add_imm(X_TMP_0, X_TMP_3, is(0) * itype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_1, X_TMP_0, is(0) * itype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_2, X_TMP_1, is(0) * itype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_3, X_TMP_2, is(0) * itype_sz, X_DEFAULT_ADDR); -- -- if (unroll * itype_sz == 32) -- for (uint32_t i = 0; i < 4; i++) -- ld1w(ZRegS {4 + i}, p_lsb_256 / T_z, ptr(x_tmp_vec[i])); -- else if (unroll * itype_sz == 16) -- for (uint32_t i = 0; i < 4; i++) -- ldr(QReg {4 + i}, ptr(x_tmp_vec[i])); -- else if (unroll * itype_sz == 8) -- for (uint32_t i = 0; i < 4; i++) -- ldr(DReg {4 + i}, ptr(x_tmp_vec[i])); -+ PReg p_size(DUMMY_IDX); -+ switch (unroll * itype_sz_) { -+ case 32: p_size = p_lsb_256; break; -+ case 16: p_size = p_lsb_128; break; -+ case 8: p_size = p_lsb_64; break; -+ default: assert(!"unreachable"); -+ } -+ -+ const int node_0_input_stride = prb_.is(0); -+ add_imm(X_TMP_0, XReg(x_ptr_in_off), itype_sz_ * i_off, X_DEFAULT_ADDR); -+ for (int i = 1; i < unroll / 2; i++) { -+ add_imm(x_tmp_vec[i], x_tmp_vec[i - 1], -+ itype_sz_ * node_0_input_stride, X_DEFAULT_ADDR); -+ } -+ -+ for (uint32_t i = 0; i < unroll / 2; i++) -+ ld1w(ZRegS {i}, p_size / T_z, ptr(x_tmp_vec[i])); -+ -+ for (int i = 0; i < unroll / 2; i++) { -+ add_imm(x_tmp_vec[i], x_tmp_vec[(i + 3) % 4], -+ itype_sz_ * node_0_input_stride, X_DEFAULT_ADDR); -+ } -+ -+ for (uint32_t i = 0; i < unroll / 2; i++) -+ ld1w(ZRegS {4 + i}, p_size / T_z, ptr(x_tmp_vec[i])); - - if (interim_f32) cvt2ps(0, unroll, prb_.itype); - - #if 0 -- /* Deubg code */ -+ /* Debug code */ - index(z0.s, 0, 1); - mov(z0.s, P_NOT_256/T_m, 0); - mov(z_tmp_vec[0].s, 16); -@@ -348,9 +363,9 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - for (uint32_t i = 0; i < unroll / 2; i++) { - ZRegB z {unroll / 2 + i}; - ZRegB z_tmp = z_tmp_vec[unroll / 2 + i].b; -- /* Move bit 128-255 to 0-127. */ -- ext(z, z, 16); - /* Move bit 0-127 to 128-255. */ -+ ext(z, z, 16); -+ /* Move bit 128-255 to 0-127. */ - ext(z_tmp, z_tmp, sveLen - 16); - } - -@@ -363,65 +378,64 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - if (need_saturation) { -- init_saturate_f32(ymm_zero, ymm_saturation_ubound, reg_tmp, -+ init_saturate_f32(ymm_zero_, ymm_saturation_ubound_, reg_tmp_, - interim_f32 ? f32 : prb_.itype, prb_.otype); - for (int i = 0; i < unroll; i++) -- saturate_f32(ZRegS(i), ymm_zero, ymm_saturation_ubound, -- prb_.otype, p_all); -+ saturate_f32(ZRegS(i), ymm_zero_, ymm_saturation_ubound_, -+ prb_.otype, P_ALL_ONE); - } - - if (prb_.otype != f32) - cvt2odt(0, unroll, prb_.otype, interim_f32 ? f32 : prb_.itype); - -- add_imm(X_TMP_0, XReg(x_ptr_out_off), o_off * otype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_1, X_TMP_0, os(1) * otype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_2, X_TMP_1, os(1) * otype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_3, X_TMP_2, os(1) * otype_sz, X_DEFAULT_ADDR); -- -- if (unroll * otype_sz == 32) -- for (uint32_t i = 0; i < 4; i++) -- st1w(ZRegS {i}, p_lsb_256 / T_z, ptr(x_tmp_vec[i])); -- else if (unroll * otype_sz == 16) -- for (uint32_t i = 0; i < 4; i++) -- str(QReg {i}, ptr(x_tmp_vec[i])); -- else if (unroll * otype_sz == 8) -- for (uint32_t i = 0; i < 4; i++) -- str(DReg {i}, ptr(x_tmp_vec[i])); -- -- add_imm(X_TMP_0, X_TMP_3, os(1) * otype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_1, X_TMP_0, os(1) * otype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_2, X_TMP_1, os(1) * otype_sz, X_DEFAULT_ADDR); -- add_imm(X_TMP_3, X_TMP_2, os(1) * otype_sz, X_DEFAULT_ADDR); -- -- if (unroll * otype_sz == 32) -- for (uint32_t i = 0; i < 4; i++) -- st1w(ZRegS {4 + i}, p_lsb_256 / T_z, ptr(x_tmp_vec[i])); -- else if (unroll * otype_sz == 16) -- for (uint32_t i = 0; i < 4; i++) -- str(QReg {4 + i}, ptr(x_tmp_vec[i])); -- else if (unroll * otype_sz == 8) -- for (uint32_t i = 0; i < 4; i++) -- str(DReg {4 + i}, ptr(x_tmp_vec[i])); -+ const int node_1_output_stride = prb_.os(1); -+ -+ switch (unroll * otype_sz_) { -+ case 32: p_size = p_lsb_256; break; -+ case 16: p_size = p_lsb_128; break; -+ case 8: p_size = p_lsb_64; break; -+ default: assert(!"unreachable"); -+ } -+ -+ add_imm(X_TMP_0, XReg(x_ptr_out_off), otype_sz_ * o_off, -+ X_DEFAULT_ADDR); -+ for (int i = 1; i < unroll / 2; i++) { -+ add_imm(x_tmp_vec[i], x_tmp_vec[i - 1], -+ otype_sz_ * node_1_output_stride, X_DEFAULT_ADDR); -+ } -+ -+ for (uint32_t i = 0; i < 4; i++) -+ st1w(ZRegS {i}, p_size / T_z, ptr(x_tmp_vec[i])); -+ -+ for (int i = 0; i < unroll / 2; i++) { -+ add_imm(x_tmp_vec[i], x_tmp_vec[(i + 3) % 4], -+ otype_sz_ * node_1_output_stride, X_DEFAULT_ADDR); -+ } -+ -+ for (uint32_t i = 0; i < unroll / 2; i++) -+ st1w(ZRegS {4 + i}, p_size / T_z, ptr(x_tmp_vec[i])); - } - - bool can_do_tr8x8() { - using namespace data_type; - -- return get_sve_length() >= Xbyak_aarch64::util::SVE_256 -- && prb_.ndims >= 2 -+ static constexpr int desirable_node_size = 8; -+ static constexpr int desirable_stride = 1; -+ -+ return mayiuse(sve_256) && prb_.ndims >= 2 - && ((utils::one_of(prb_.itype, u8, data_type::s8, s32, f32) - && utils::one_of( - prb_.otype, u8, data_type::s8, s32, f32))) -- && utils::everyone_is(8, n(0), n(1)) -- && utils::everyone_is(1, os(0), is(1)) -- && utils::everyone_is(0, prb_.ip_tail, prb_.op_tail) -+ && utils::everyone_is(desirable_node_size, prb_.n(0), prb_.n(1)) -+ && utils::everyone_is(desirable_stride, prb_.os(0), prb_.is(1)) -+ && !prb_.is_tail_present - && prb_.scale_type == scale_type_t::NONE && prb_.beta == 0.f; - } - -- bool process_unroll_tr8x8(int len) { -+ bool process_unroll_tr8x8(const int ndims, const int len) { - if (!can_do_tr8x8()) return false; - -- const int step_size = n(0) * n(1); -+ const int step_size = prb_.n(0) * prb_.n(1); - int i_off = 0, o_off = 0; - for (int off = 0; off < len; off += step_size) { - step(off, i_off, o_off, i_off, o_off, step_size); -@@ -432,23 +446,56 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - template -- bool process_direct_copy(int len) { -+ bool process_direct_copy(const int ndims, const int len) { - using namespace data_type; - -- const int simd_w = cpu_isa_traits::vlen / itype_sz; -- bool can_do = true && mayiuse(isa) -- && utils::everyone_is(1, os(0), is(0)) -- && (false || prb_.itype == prb_.otype -+ static constexpr int desirable_stride = 1; -+ using TRegS = -+ typename utils::conditional::type; -+ const int simd_w = cpu_isa_traits::vlen / itype_sz_; -+ -+ // TODO: support tail_processing for direct copy -+ -+ const bool do_src_zp = prb_.req_src_zp; -+ const bool do_dst_zp = prb_.req_dst_zp; -+ const bool zp_applicable = IMPLICATION( -+ (do_src_zp || do_dst_zp), utils::one_of(prb_.itype, s32, f32)); -+ const bool can_do = true && mayiuse(isa) -+ && compensation_needed_ == false -+ && utils::everyone_is(desirable_stride, prb_.os(0), prb_.is(0)) -+ && (false || (prb_.itype == prb_.otype ? zp_applicable : false) - || (prb_.itype == s32 && prb_.otype == f32) - || (prb_.itype == f32 && prb_.otype == s32)) -- && len % simd_w == 0 && n(0) % len == 0 -- && prb_.ip_tail % simd_w == 0 && prb_.op_tail % simd_w == 0 -+ && len % simd_w == 0 && prb_.n(0) % len == 0 -+ && !prb_.is_tail_present - && prb_.scale_type == scale_type_t::NONE && prb_.beta == 0.f; - if (!can_do) return false; - -+ static constexpr int vmm_zp_last_idx = 15; -+ const auto vmm_src_zp -+ = TRegS(do_dst_zp ? vmm_zp_last_idx - 1 : vmm_zp_last_idx); -+ if (do_src_zp) { -+ uni_ld1rw(vmm_src_zp, PARAM(src_zp)); -+ uni_scvtf(vmm_src_zp, vmm_src_zp); -+ } -+ const auto vmm_dst_zp = TRegS(vmm_zp_last_idx); -+ if (do_dst_zp) { -+ uni_ld1rw(vmm_dst_zp, PARAM(dst_zp)); -+ uni_scvtf(vmm_dst_zp, vmm_dst_zp); -+ } -+ -+ const auto apply_zp_ps = [&](const TRegS vmm) { -+ if (do_src_zp) fsub(vmm, vmm, vmm_src_zp); -+ if (do_dst_zp) fadd(vmm, vmm, vmm_dst_zp); -+ }; -+ - for (int off = 0; off < len;) { -- const int unroll -+ // TODO: we need extra reg for proper saturation if otype == s32 -+ int unroll - = nstl::min(16 - (prb_.otype == s32), (len - off) / simd_w); -+ unroll = (do_src_zp || do_dst_zp) -+ ? nstl::min(unroll, 16 - do_src_zp - do_dst_zp) -+ : unroll; - - int ur = 0; - int tmp_ur = 0; -@@ -458,14 +505,11 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - do { - add_imm(x_tmp_vec[count++], x_ptr_in_off, -- (off + ur * simd_w) * itype_sz, X_DEFAULT_ADDR); -+ (off + ur * simd_w) * itype_sz_, X_DEFAULT_ADDR); - ur++; - } while (ur < unroll && count < x_tmp_vec_size); - - for (int i = 0; i < count; i++) { -- /* if (vlen == 64) -- ldr(ZReg(tmp_ur + i), ptr(x_tmp_vec[i])); -- else */ - if (vlen == 64 || vlen == 32) - ld1w(ZRegS(tmp_ur + i), p_lsb_256 / T_z, - ptr(x_tmp_vec[i])); -@@ -478,33 +522,28 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - if (prb_.itype != prb_.otype) { -- const int vlen = cpu_isa_traits::vlen; - for (int ur = 0; ur < unroll; ++ur) { -+ TRegS r(ur); - if (prb_.itype == s32 && prb_.otype == f32) { -- if (vlen == 64 || vlen == 32) { -- ZRegS r(ur); -- /* MSB side 256 bits are ignored. */ -- scvtf(r, p_all / T_m, r); -- } else if (vlen == 16) { -- VReg4S r(ur); -- scvtf(r, r); -- } else -- assert(!"unreachable"); -+ uni_scvtf(r, r); - } else if (prb_.itype == f32 && prb_.otype == s32) { -- /* Out of order can be expected. */ -- if (vlen == 64 || vlen == 32) { -- ZRegS r(ur); -- frinti(r, p_all / T_m, r); -- fcvtzs(r, p_all / T_m, r); -- } else if (vlen == 16) { -- VReg4S r(ur); -- frinti(r, r); -- fcvtzs(r, r); -- } else -- assert(!"unreachable"); -+ uni_frinti(r, r); -+ uni_fcvtzs(r, r); - } else - assert(!"unreachable"); - } -+ } else if (do_src_zp || do_dst_zp) { -+ for (int ur = 0; ur < unroll; ++ur) { -+ const auto vmm = TRegS(ur); -+ if (prb_.otype == f32) { -+ apply_zp_ps(vmm); -+ } else if (prb_.otype == s32) { -+ uni_scvtf(vmm, vmm); -+ apply_zp_ps(vmm); -+ uni_frinti(vmm, vmm); -+ uni_fcvtzs(vmm, vmm); -+ } -+ } - } - - ur = 0; -@@ -515,7 +554,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - do { - add_imm(x_tmp_vec[count++], x_ptr_out_off, -- (off + ur * simd_w) * otype_sz, X_DEFAULT_ADDR); -+ (off + ur * simd_w) * otype_sz_, X_DEFAULT_ADDR); - ur++; - } while (ur < unroll && count < x_tmp_vec_size); - -@@ -538,8 +577,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - void process_unroll_generic_step(int reg_unroll, const int *i_off, -- const int *o_off, const int *s_off, const int *ip_padding, -- const bool h_padded) { -+ const int *o_off, const int *s_off, const int *c_off, -+ const int *zero_padding, const bool tail_processing) { - using namespace data_type; - - auto cvt2ps -@@ -588,76 +627,84 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - }; - -+ auto load_bytes_addr = [=](const int ur, const int r) { -+ add_imm(x_tmp_vec[r], x_ptr_in_off, i_off[ur + r] * itype_sz_, -+ X_DEFAULT_ADDR); -+ }; -+ auto load_bytes = [=](const int ur, int size, int r) { -+ switch (size) { -+ case 4: ld1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); break; -+ case 2: ld1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); break; -+ case 1: ld1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); break; -+ default: assert(!"unreachable"); -+ } -+ }; -+ -+ auto store = [=](const XReg &addr, const VReg ymm, int size) { -+ const uint32_t xmm = ymm.getIdx(); -+ switch (size) { -+ case 16: str(QReg(xmm), ptr(addr)); break; -+ case 8: str(DReg(xmm), ptr(addr)); break; -+ case 4: str(SReg(xmm), ptr(addr)); break; -+ case 2: str(HReg(xmm), ptr(addr)); break; -+ case 1: str(BReg(xmm), ptr(addr)); break; -+ default: assert(!"unreachable"); -+ } -+ }; -+ - /* check whether loading 4 values at once is possible */ -- bool can_load_xmm = reg_unroll % 4 == 0; -+ static constexpr int xmm_vlen = 4; -+ bool can_load_xmm = reg_unroll % xmm_vlen == 0; - for (int ur = 1; ur < reg_unroll; ++ur) -- if (i_off[ur] != i_off[ur - 1] + 1) can_load_xmm = false; -- const int load_step = can_load_xmm ? 4 : 1; -+ if (i_off[ur] != i_off[ur - 1] + 1) { -+ can_load_xmm = false; -+ break; -+ } -+ const int load_step = can_load_xmm ? xmm_vlen : 1; - - /* check whether storing 4 values at once is possible */ -- bool can_store_xmm = reg_unroll % 4 == 0; -+ bool can_store_xmm = reg_unroll % xmm_vlen == 0; - for (int ur = 1; ur < reg_unroll; ++ur) -- if (o_off[ur] != o_off[ur - 1] + 1) can_store_xmm = false; -+ if (o_off[ur] != o_off[ur - 1] + 1) { -+ can_store_xmm = false; -+ break; -+ } - const int ur_step = can_store_xmm ? 4 : 1; - const int load_tail_step - = !can_load_xmm && can_store_xmm ? ur_step : load_step; - -- const bool interim_f32 = false -- || utils::one_of(f32, prb_.itype, prb_.otype) -- || prb_.scale_type != scale_type_t::NONE || prb_.beta != 0.f; -+ const bool interim_f32 = interim_f32_needed(); - - const bool need_saturation - = (utils::one_of(prb_.otype, u8, data_type::s8, s32) - && interim_f32); -- if (h_padded) { -+ -+ std::vector store_masks; -+ if (tail_processing) { - for (int ur = 0; ur < reg_unroll; ur += load_tail_step) { -- if (itype_sz == 4) -- movi(VReg4S(ur), 0); -- else if (itype_sz == 2) -- movi(VReg8H(ur), 0); -- else -- movi(VReg16B(ur), 0); -- /* x_tmp_vec = X_TMP_0 - X_TMP_4 -- Do not use X_TMP_? as the last arg. */ -- for (int r = 0; r < load_tail_step; ++r) { -- if (ip_padding[ur + r] == 0) { -- add_imm(x_tmp_vec[r], x_ptr_in_off, -- i_off[ur + r] * itype_sz, X_DEFAULT_ADDR); -- } -- } -+ uni_clear(VReg(ur)); -+ store_masks.push_back(0); - - for (int r = 0; r < load_tail_step; ++r) { -- if (ip_padding[ur + r] == 0) { -- if (itype_sz == 4) -- ld1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); -- else if (itype_sz == 2) -- ld1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); -- else -- ld1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); -+ if (zero_padding[ur + r] == 0) { -+ store_masks.back() += 1 << r; -+ load_bytes_addr(ur, r); - } - } -+ -+ for (int r = 0; r < load_tail_step; ++r) -+ if (zero_padding[ur + r] == 0) load_bytes(ur, itype_sz_, r); - } - } else { - if (!can_load_xmm && can_store_xmm) { -- assert(ur_step == 4); -+ assert(ur_step == xmm_vlen); - /* load with stride */ - for (int ur = 0; ur < reg_unroll; ur += ur_step) { -- -- /* x_tmp_vec = X_TMP_0 - X_TMP_4 -- Do not use X_TMP_? as the last arg. */ - for (int r = 0; r < ur_step; ++r) { -- add_imm(x_tmp_vec[r], x_ptr_in_off, -- i_off[ur + r] * itype_sz, X_DEFAULT_ADDR); -- } -- -- for (int r = 0; r < ur_step; ++r) { -- if (itype_sz == 4) -- ld1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); -- else if (itype_sz == 2) -- ld1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); -- else -- ld1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); -+ load_bytes_addr(ur, r); - } -+ for (int r = 0; r < ur_step; ++r) -+ load_bytes(ur, itype_sz_, r); - } - } else { - int ur = 0; -@@ -667,13 +714,13 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - do { - add_imm(x_tmp_vec[count++], x_ptr_in_off, -- i_off[ur] * itype_sz, X_DEFAULT_ADDR); -+ i_off[ur] * itype_sz_, X_DEFAULT_ADDR); - ur += load_step; - } while (ur < reg_unroll && count < x_tmp_vec_size); - - for (int i = 0; i < count; i++) { - -- switch (load_step * itype_sz) { -+ switch (load_step * itype_sz_) { - case 16: - ldr(QReg(tmp_ur), ptr(x_tmp_vec[i])); - break; -@@ -688,6 +735,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - } - } -+ - /* xmm[:] <-- (f32)xmm[:] */ - if (interim_f32) { - const int cvt_step = nstl::max(load_step, ur_step); -@@ -702,30 +750,32 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - if (fast_return) { - if (prb_.scale_type == scale_type_t::COMMON) - for (int ur = 0; ur < reg_unroll; ur += load_step) -- fmul(VReg4S(ur), VReg4S(ur), xmm_scale); -+ fmul(VReg4S(ur), VReg4S(ur), xmm_scale_); - if (prb_.otype != f32) { -- init_saturate_f32(xmm_zero, xmm_saturation_ubound, reg_tmp, -- interim_f32 ? f32 : prb_.itype, prb_.otype); -- for (int ur = 0; ur < reg_unroll; ur += load_step) -+ init_saturate_f32(xmm_zero_, xmm_saturation_ubound_, -+ reg_tmp_, interim_f32 ? f32 : prb_.itype, -+ prb_.otype); -+ for (int ur = 0; ur < reg_unroll; ur += load_step) { - if (need_saturation) -- saturate_f32(VReg4S(ur), xmm_zero, -- xmm_saturation_ubound, prb_.otype, p_all); -+ saturate_f32(VReg4S(ur), xmm_zero_, -+ xmm_saturation_ubound_, prb_.otype, -+ P_ALL_ONE); -+ } - - for (int ur = 0; ur < reg_unroll; ur += load_step) - cvt2odt(ur, 1, prb_.otype, - interim_f32 ? f32 : prb_.itype); - } -- /* load_step is 1 or 4. */ - for (int ur = 0; ur < reg_unroll; ur += load_step) { - for (int r = 0; r < load_step; ++r) { - add_imm(x_tmp_vec[r], x_ptr_out_off, -- o_off[ur + r] * otype_sz, X_DEFAULT_ADDR); -+ o_off[ur + r] * otype_sz_, X_DEFAULT_ADDR); - } - - for (int r = 0; r < load_step; ++r) { -- if (otype_sz == 4) -+ if (otype_sz_ == 4) - st1(VReg4S(ur)[r], ptr(x_tmp_vec[r])); -- else if (otype_sz == 2) -+ else if (otype_sz_ == 2) - st1(VReg8H(ur)[r], ptr(x_tmp_vec[r])); - else - st1(VReg16B(ur)[r], ptr(x_tmp_vec[r])); -@@ -735,7 +785,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - /* scatter elements of xmm into 4 xmms */ -- if (itype_sz == 4 || interim_f32) { -+ if (itype_sz_ == 4 || interim_f32) { - for (int ur = 0; ur < reg_unroll; ur += load_step) - for (int r = 1; r < load_step; ++r) { - VReg4S v(ur); -@@ -747,7 +797,18 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - for (int ur = 0; ur < reg_unroll; ur += load_step) - for (int r = 1; r < load_step; ++r) - ext(VReg16B(ur + r), VReg16B(ur), VReg16B(ur), -- itype_sz * r); -+ itype_sz_ * r); -+ } -+ } -+ -+ /* src zero point application */ -+ if (prb_.req_src_zp) { -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { -+ const auto xmm = VReg4S(ur); -+ if (interim_f32) -+ fsub(xmm, xmm, xmm_src_zp_); -+ else -+ sub(xmm, xmm, xmm_src_zp_); - } - } - -@@ -756,7 +817,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - /* xmm <-- scale * xmm[:] */ - if (prb_.scale_type == scale_type_t::COMMON) { - for (int ur = 0; ur < reg_unroll; ur += ur_step) -- fmul(VReg4S(ur), VReg4S(ur), xmm_scale); -+ fmul(VReg4S(ur), VReg4S(ur), xmm_scale_); - } else if (prb_.scale_type == scale_type_t::MANY) { - enum class scale_load_type_t { bcast, load, gather }; - -@@ -769,13 +830,12 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - scale_load_type = scale_load_type_t::load; - - if (scale_load_type == scale_load_type_t::bcast -- && !h_padded) { -- VReg4S v(xmm_scale.getIdx()); -+ && !tail_processing) { -+ VReg4S v(xmm_scale_.getIdx()); - VReg4S v_dst(ur); -- add_imm(X_TMP_0, x_ptr_scale_off, s_off[ur] * stype_sz, -+ add_imm(X_TMP_0, x_ptr_scale_off, s_off[ur] * stype_sz_, - X_DEFAULT_ADDR); -- ldr(W_TMP_0, ptr(X_TMP_0)); -- dup(v, W_TMP_0); -+ ld1r(v, ptr(X_TMP_0)); - fmul(v_dst, v_dst, v); - continue; - } -@@ -786,10 +846,10 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - scale_load_type = scale_load_type_t::gather; - - if (scale_load_type == scale_load_type_t::load -- && !h_padded) { -- uint32_t idx = xmm_scale.getIdx(); -+ && !tail_processing) { -+ uint32_t idx = xmm_scale_.getIdx(); - VReg4S v_dst(ur); -- add_imm(X_TMP_0, x_ptr_scale_off, s_off[ur] * stype_sz, -+ add_imm(X_TMP_0, x_ptr_scale_off, s_off[ur] * stype_sz_, - X_DEFAULT_ADDR); - - ldr(QReg {idx}, ptr(X_TMP_0)); -@@ -799,22 +859,15 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - // load doesn't work as well - // so gather the scale factors one by one -- /*ur_step is 1 or 4. */ -- for (int r = ur; r < ur + ur_step; ++r) { -- if (ip_padding[r] == 0 || !h_padded) { -- /* x_tmp_vec = X_TMP_0 - X_TMP_4 -- Do not use X_TMP_? as the last arg. */ -+ for (int r = ur; r < ur + ur_step; ++r) -+ if (zero_padding[r] == 0 || !tail_processing) { - add_imm(x_tmp_vec[r - ur], x_ptr_scale_off, -- s_off[r] * stype_sz, X_DEFAULT_ADDR); -- } -- } -- for (int r = ur; r < ur + ur_step; ++r) { -- if (ip_padding[r] == 0 || !h_padded) { -- VReg4S v(xmm_scale.getIdx()); -- ld1(v[r - ur], ptr(x_tmp_vec[r - ur])); -+ s_off[r] * stype_sz_, X_DEFAULT_ADDR); - } -- } -- fmul(VReg4S(ur), VReg4S(ur), xmm_scale); -+ for (int r = ur; r < ur + ur_step; ++r) -+ if (zero_padding[r] == 0 || !tail_processing) -+ ld1(xmm_scale_[r - ur], ptr(x_tmp_vec[r - ur])); -+ fmul(VReg4S(ur), VReg4S(ur), xmm_scale_); - } - } - -@@ -829,7 +882,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - do { - add_imm(x_tmp_vec[count++], x_ptr_out_off, -- o_off[ur] * otype_sz, X_DEFAULT_ADDR); -+ o_off[ur] * otype_sz_, X_DEFAULT_ADDR); - ur += ur_step; - } while (ur < reg_unroll && count < x_tmp_vec_size); - -@@ -873,7 +926,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - if (prb_.scale_type == scale_type_t::COMMON) { - for (int ur = 0; ur < reg_unroll; ur += ur_step) { - VReg4S tmp(ur); -- fmul(tmp, tmp, VReg4S(xmm_scale.getIdx())); -+ fmul(tmp, tmp, VReg4S(xmm_scale_.getIdx())); - } - } else if (prb_.scale_type == scale_type_t::MANY) { - int ur = 0; -@@ -883,7 +936,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - do { - add_imm(x_tmp_vec[count++], x_ptr_scale_off, -- s_off[ur] * stype_sz, X_DEFAULT_ADDR); -+ s_off[ur] * stype_sz_, X_DEFAULT_ADDR); - ur += ur_step; - } while (ur < reg_unroll && count < x_tmp_vec_size); - -@@ -908,7 +961,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - do { - add_imm(x_tmp_vec[count++], x_ptr_out_off, -- o_off[ur] * otype_sz, X_DEFAULT_ADDR); -+ o_off[ur] * otype_sz_, X_DEFAULT_ADDR); - ur += ur_step; - } while (ur < reg_unroll && count < (x_tmp_vec_size / 2)); - -@@ -951,94 +1004,272 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - } - -- if (need_saturation) { -- init_saturate_f32( -- xmm_zero, xmm_saturation_ubound, reg_tmp, f32, prb_.otype); -+ /* dst zero point application */ -+ if (prb_.req_dst_zp) { - for (int ur = 0; ur < reg_unroll; ur += ur_step) { -- saturate_f32(VReg4S(ur), xmm_zero, xmm_saturation_ubound, -- prb_.otype, p_all); -+ const auto xmm = VReg4S(ur); -+ if (interim_f32) -+ fadd(xmm, xmm, xmm_dst_zp_); -+ else -+ add(xmm, xmm, xmm_dst_zp_); - } - } - -- for (int ur = 0; ur < reg_unroll; ur += ur_step) { -- if (prb_.otype != f32) -- cvt2odt(ur, 1, prb_.otype, interim_f32 ? f32 : prb_.itype); -+ /* adjust scale application */ -+ if (prb_.scale_adjust != 1.f) { -+ dup(xmm_tmp_, reg_scale_adjust_); -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { -+ fmul(VReg4S(ur), VReg4S(ur), xmm_tmp_); -+ } -+ } -+ -+ if (need_saturation) { -+ init_saturate_f32(xmm_zero_, xmm_saturation_ubound_, reg_tmp_, f32, -+ prb_.otype); -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { -+ saturate_f32(VReg4S(ur), xmm_zero_, xmm_saturation_ubound_, -+ prb_.otype, P_ALL_ONE); -+ } -+ -+ // reset back xmm_zero_ if needed. -+ if (compensation_needed_ && (prb_.req_src_zp || prb_.req_dst_zp)) -+ uni_clear(VReg(xmm_zero_.getIdx())); - } - -- int ur = 0; -- int tmp_ur = 0; -- while (ur < reg_unroll) { -- int count = 0; -+ if (compensation_needed_) { -+ const uint32_t xmm_begin = 9; -+ const uint32_t xmm_end = 11; -+ uint32_t xmm_id = xmm_begin; -+ const auto get_temp_xmm = [&] { -+ const Xbyak_aarch64::VReg temp {xmm_id++}; -+ -+ if (xmm_id > xmm_end) { xmm_id = xmm_begin; } -+ -+ return temp; -+ }; -+ if (can_store_xmm) { -+ enum class comp_load_type_t { bcast, load, gather }; -+ -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { -+ -+ bool all_ip_padding_one = true; -+ bool all_ip_padding_zero = true; -+ for (int r = ur; r < ur + ur_step; r++) { -+ if (zero_padding[r] != 1) -+ all_ip_padding_one = false; -+ else -+ all_ip_padding_zero = false; -+ } -+ if (all_ip_padding_one) continue; -+ -+ comp_load_type_t comp_load_type = comp_load_type_t::bcast; -+ -+ for (int r = ur + 1; r < ur + ur_step; ++r) -+ if (c_off[r] != c_off[r - 1] + 0) { -+ comp_load_type = comp_load_type_t::load; -+ break; -+ } - -- do { -- add_imm(x_tmp_vec[count++], x_ptr_out_off, o_off[ur] * otype_sz, -- X_DEFAULT_ADDR); -- ur += ur_step; -- } while (ur < reg_unroll && count < x_tmp_vec_size); -+ if (comp_load_type == comp_load_type_t::bcast -+ && all_ip_padding_zero) { -+ const auto reduction_xmm = get_temp_xmm().s4; -+ const auto xmm_reorder_result = VReg4S(ur); -+ frinti(reduction_xmm, xmm_reorder_result); -+ addv(SReg(reduction_xmm.getIdx()), reduction_xmm); -+ const auto comp_addr = c_addr(c_off[ur]); -+ const auto xmm_tmp_ = get_temp_xmm().s4; -+ ldr(SReg(xmm_tmp_.getIdx()), ptr(comp_addr)); -+ add(xmm_tmp_, xmm_tmp_, reduction_xmm); -+ str(SReg(xmm_tmp_.getIdx()), ptr(comp_addr)); -+ continue; -+ } -+ -+ if (comp_load_type == comp_load_type_t::load) -+ for (int r = ur + 1; r < ur + ur_step; ++r) -+ if (c_off[r] != c_off[r - 1] + 1) { -+ comp_load_type = comp_load_type_t::gather; -+ break; -+ } -+ -+ if (comp_load_type == comp_load_type_t::load -+ && all_ip_padding_zero) { -+ const auto xmm_reorder_result_dq = get_temp_xmm().s4; -+ const auto xmm_reorder_result = VReg4S(ur); -+ const auto comp_addr = c_addr(c_off[ur]); -+ frinti(xmm_reorder_result_dq, xmm_reorder_result); -+ const auto xmm_tmp_ = get_temp_xmm().s4; -+ ldr(SReg(xmm_tmp_.getIdx()), ptr(comp_addr)); -+ add(xmm_reorder_result_dq, xmm_reorder_result_dq, -+ xmm_tmp_); -+ str(SReg(xmm_tmp_.getIdx()), ptr(comp_addr)); -+ continue; -+ } - -- for (int i = 0; i < count; i++) { -+ const auto xmm_reorder_result_dq = get_temp_xmm().s4; -+ const auto xmm_reorder_result = VReg4S(ur); -+ frinti(xmm_reorder_result_dq, xmm_reorder_result); - -- switch (ur_step * otype_sz) { -- case 16: str(QReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 8: str(DReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 4: str(SReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 2: str(HReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- case 1: str(BReg(tmp_ur), ptr(x_tmp_vec[i])); break; -- default: assert(!"unreachable"); -+ for (int r = ur; r < ur + ur_step; ++r) { -+ if (zero_padding[r] == 0 || !tail_processing) { -+ mov(W_TMP_0, xmm_reorder_result_dq[r]); -+ const auto comp_addr = c_addr(c_off[ur]); -+ str(W_TMP_0, ptr(comp_addr)); -+ } -+ } -+ } -+ } else { -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { -+ if (zero_padding[ur] == 0 || !tail_processing) { -+ const auto xmm_reorder_result_dq = get_temp_xmm().s4; -+ const auto xmm_reorder_result = VReg4S(ur); -+ const auto comp_addr = c_addr(c_off[ur]); -+ frinti(xmm_reorder_result_dq, xmm_reorder_result); -+ const auto xmm_tmp_ = get_temp_xmm().s4; -+ ldr(SReg(xmm_tmp_.getIdx()), ptr(comp_addr)); -+ add(xmm_reorder_result_dq, xmm_reorder_result_dq, -+ xmm_tmp_); -+ str(SReg(xmm_tmp_.getIdx()), ptr(comp_addr)); -+ } - } -- tmp_ur += ur_step; - } - } -+ -+ for (int ur = 0; ur < reg_unroll; ur += ur_step) { -+ if (prb_.req_src_zp || prb_.req_dst_zp) { -+ const bool use_store_masks = !store_masks.empty(); -+ if (use_store_masks) { -+ const auto mask = (~store_masks[ur / ur_step]) & 0xF; -+ switch (mask) { -+ case 0x0: -+ /* Do nothing */ -+ break; -+ case 0x1: ins(VReg4S(ur)[0], xmm_zero_[0]); break; -+ case 0x2: ins(VReg4S(ur)[1], xmm_zero_[1]); break; -+ case 0x3: -+ ins(VReg2D(ur)[0], VReg2D(xmm_zero_.getIdx())[0]); -+ break; -+ case 0x4: ins(VReg4S(ur)[2], xmm_zero_[2]); break; -+ case 0x5: -+ ins(VReg4S(ur)[0], xmm_zero_[0]); -+ ins(VReg4S(ur)[2], xmm_zero_[2]); -+ break; -+ case 0x6: -+ ins(VReg4S(ur)[1], xmm_zero_[1]); -+ ins(VReg4S(ur)[2], xmm_zero_[2]); -+ break; -+ case 0x7: -+ ins(VReg2D(ur)[0], VReg2D(xmm_zero_.getIdx())[0]); -+ ins(VReg4S(ur)[2], xmm_zero_[2]); -+ break; -+ case 0x8: ins(VReg4S(ur)[3], xmm_zero_[3]); break; -+ case 0x9: -+ ins(VReg4S(ur)[0], xmm_zero_[0]); -+ ins(VReg4S(ur)[3], xmm_zero_[3]); -+ break; -+ case 0xa: -+ ins(VReg4S(ur)[1], xmm_zero_[1]); -+ ins(VReg4S(ur)[3], xmm_zero_[3]); -+ break; -+ case 0xb: -+ ins(VReg2D(ur)[0], VReg2D(xmm_zero_.getIdx())[0]); -+ ins(VReg4S(ur)[3], xmm_zero_[3]); -+ break; -+ case 0xc: -+ ins(VReg2D(ur)[1], VReg2D(xmm_zero_.getIdx())[1]); -+ break; -+ case 0xd: -+ ins(VReg4S(ur)[0], xmm_zero_[0]); -+ ins(VReg2D(ur)[1], VReg2D(xmm_zero_.getIdx())[1]); -+ break; -+ case 0xe: -+ ins(VReg4S(ur)[1], xmm_zero_[1]); -+ ins(VReg2D(ur)[1], VReg2D(xmm_zero_.getIdx())[1]); -+ break; -+ case 0xf: movi(VReg16B(ur), 0); break; -+ default: assert(!"unreachable"); -+ } -+ } -+ } -+ if (prb_.otype != f32) -+ cvt2odt(ur, 1, prb_.otype, interim_f32 ? f32 : prb_.itype); -+ -+ store(o_addr(o_off[ur]), VReg(ur), ur_step * otype_sz_); -+ } - } - -- void comp_padding_flag(int ndims, int off, int len, int &i_tail) { -- const int ip_without_padding -- = ndims == 0 ? len - ip_padding() : prb_.ip_tail; -- if ((ndims == 0 && off >= ip_without_padding) -- || (ndims > 0 && (off % prb_.oblock) >= ip_without_padding)) -- i_tail = 1; -+ bool interim_f32_needed() { -+ using namespace data_type; -+ -+ return utils::one_of(f32, prb_.itype, prb_.otype) -+ || prb_.scale_type != scale_type_t::NONE || prb_.beta != 0.f -+ || ((prb_.req_src_zp || prb_.req_dst_zp) -+ ? !(prb_.itype == s32 && prb_.otype == s32) -+ : false) -+ || (prb_.itype != f32 && compensation_needed_) -+ || prb_.scale_adjust != 1.f; - } - -- void process_unroll_generic(const int ndims, int len, const bool h_padded) { -+ void process_unroll_generic( -+ const int ndims, int len, const bool tail_processing) { -+ assert(IMPLICATION(prb_.nodes[0].tail_size > 0, -+ len == static_cast(prb_.nodes[0].n) -+ || len == static_cast(prb_.nodes[0].tail_size))); -+ - const int blk = 8; - - int i_off[2 * blk] = {0}; - int o_off[2 * blk] = {0}; - int s_off[2 * blk] = {0}; -+ int c_off[2 * blk] = {0}; - - int curr = 0; // will switch between 0 and 1 - -+ const bool interim_f32 = interim_f32_needed(); -+ -+ if (prb_.req_src_zp) { -+ add_imm(X_DEFAULT_ADDR, PARAM(src_zp), X_TMP_0); -+ ld1r(xmm_src_zp_, ptr(X_DEFAULT_ADDR)); -+ if (interim_f32) scvtf(xmm_src_zp_, xmm_src_zp_); -+ } -+ if (prb_.req_dst_zp) { -+ add_imm(X_DEFAULT_ADDR, PARAM(dst_zp), X_TMP_0); -+ ld1r(xmm_dst_zp_, ptr(X_DEFAULT_ADDR)); -+ if (interim_f32) scvtf(xmm_dst_zp_, xmm_dst_zp_); -+ } -+ - for (int off = 0; off < len; off += blk) { - const int reg_unroll = nstl::min(off + blk, len) - off; -- int ip_padding[blk] = {0}; -+ int zero_padding[blk] = {0}; -+ const auto curr_blk = curr * blk; - - /* compute offsets and tail*/ - for (int ur = off != 0 ? 0 : 1; ur < reg_unroll; ++ur) { -- const int ur_c = curr * blk + ur; -+ const int ur_c = curr_blk + ur; - const int ur_p = (ur_c - 1 + 2 * blk) % (2 * blk); // prev ur -+ const bool is_tail -+ = off + ur >= static_cast(prb_.nodes[0].tail_size); - step(off + ur, i_off[ur_p], o_off[ur_p], s_off[ur_p], -- i_off[ur_c], o_off[ur_c], s_off[ur_c]); -- if (h_padded) -- comp_padding_flag(ndims, off + ur, len, ip_padding[ur]); -+ c_off[ur_p], i_off[ur_c], o_off[ur_c], s_off[ur_c], -+ c_off[ur_c]); -+ if (tail_processing && is_tail) zero_padding[ur] = 1; - } -- process_unroll_generic_step(reg_unroll, i_off + curr * blk, -- o_off + curr * blk, s_off + curr * blk, ip_padding, -- h_padded); -+ -+ process_unroll_generic_step(reg_unroll, i_off + curr_blk, -+ o_off + curr_blk, s_off + curr_blk, c_off + curr_blk, -+ zero_padding, tail_processing); - - curr = 1 - curr; - } - } - - void compute_ker( -- const int ndims, const int len_unroll, const bool h_padded) { -+ const int ndims, const int len_unroll, const bool tail_processing) { - bool optimized = false; -- optimized = optimized -- || (process_direct_copy(len_unroll) && !h_padded); -- optimized = optimized -- || (process_direct_copy(len_unroll) && !h_padded); -- optimized -- = optimized || (process_unroll_tr8x8(len_unroll) && !h_padded); -- if (!optimized) process_unroll_generic(ndims, len_unroll, h_padded); -+ optimized = optimized || process_direct_copy(ndims, len_unroll) -+ || process_direct_copy(ndims, len_unroll) -+ || process_unroll_tr8x8(ndims, len_unroll); -+ if (!optimized) -+ process_unroll_generic(ndims, len_unroll, tail_processing); - } - - void loop_begin(Label &l, XReg reg_cnt, int len) { -@@ -1046,97 +1277,287 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - L(l); - } - -+ void check_if_this_is_last_chunk(const XReg reg_curr_chunk, int node_id) { -+ // Chunks are backwards numered i.e: -+ // [0] -> [node_size] -+ // [1] -> [node_size - 1] -+ // ... -+ // [node_size - 1] -> [1] -+ -+ // It is done like this, because it is easier to decrement counter -+ // and check if it is equal to zero than increment and check -+ // if it is equal to node_size. -+ static constexpr int64_t last_chunk = 1; -+ cmp(reg_curr_chunk, last_chunk); -+ } -+ -+ void zero_dst_memory(const int bytes_to_zeroing) { -+ static constexpr int num_of_bytes_in_xmm = 128 / 8; -+ -+ const int xmms_to_zeroing -+ = std::div(bytes_to_zeroing, num_of_bytes_in_xmm).quot; -+ const int tail_to_zeroing -+ = std::div(bytes_to_zeroing, num_of_bytes_in_xmm).rem; -+ -+ movi(xmm_tmp_, 0); -+ -+ if (xmms_to_zeroing > 0) { -+ Label loop; -+ -+ mov(reg_tmp_, xmms_to_zeroing); -+ L(loop); -+ str(QReg(xmm_tmp_.getIdx()), ptr(o_addr(0))); -+ add_imm(reg_off_out_, reg_off_out_, num_of_bytes_in_xmm, X_TMP_0); -+ add_imm(x_ptr_out_off, x_ptr_out_off, num_of_bytes_in_xmm, X_TMP_0); -+ subs(reg_tmp_, reg_tmp_, 1); -+ mov(X_TMP_0, 32); -+ b(NE, loop); -+ } -+ -+ if (tail_to_zeroing) mov_imm(W_TMP_0, 0); -+ -+ for (int i = 0; i < tail_to_zeroing; i++) -+ strb(W_TMP_0, ptr(o_addr(i, false))); -+ -+ // Restore dst offset to initial value -+ if (xmms_to_zeroing > 0) { -+ sub_imm(reg_off_out_, reg_off_out_, -+ num_of_bytes_in_xmm * xmms_to_zeroing, X_TMP_0); -+ sub_imm(x_ptr_out_off, x_ptr_out_off, -+ num_of_bytes_in_xmm * xmms_to_zeroing, X_TMP_0); -+ } -+ } -+ -+ void finalize_tail_loop(int i_step, int o_step, int s_step, int c_step, -+ const int curr_node_id) { -+ static constexpr int empty_chunk_info = -1; -+ -+ mov(reg_tmp_, empty_chunk_info); -+ str(reg_tmp_, ptr(data_chunk_addr(curr_node_id))); -+ -+ const int padded_area = prb_.nodes[curr_node_id].n -+ - prb_.nodes[curr_node_id].tail_size; -+ -+ if (prb_.nodes[curr_node_id].is_zero_pad_needed) { -+ int num_of_zero_padded_values = padded_area; -+ for (int i = curr_node_id - 1; i >= 0; i--) { -+ num_of_zero_padded_values *= prb_.nodes[i].n; -+ } -+ -+ const int bytes_to_zeroing = num_of_zero_padded_values * otype_sz_; -+ zero_dst_memory(bytes_to_zeroing); -+ } -+ -+ // This function is called by loop_end. At the end -+ // of loop_end is section that is responsible for -+ // restoring offset values. Restoring is based on -+ // len value which is equal to prb.nodes[x].n. -+ // If fill_zero_padded_area is called then it means -+ // offsets were shifted prb.nodes[x].tail_size times. -+ // Therefore, this function has to shift offsets by -+ // zero pad area. -+ add_imm(reg_off_in_, reg_off_in_, padded_area * i_step * itype_sz_, -+ X_TMP_0); -+ add_imm(reg_off_out_, reg_off_out_, padded_area * o_step * otype_sz_, -+ X_TMP_0); -+ add_imm(x_ptr_in_off, x_ptr_in_off, padded_area * i_step * itype_sz_, -+ X_TMP_0); -+ add_imm(x_ptr_out_off, x_ptr_out_off, padded_area * o_step * otype_sz_, -+ X_TMP_0); -+ if (prb_.scale_type == scale_type_t::MANY) { -+ add_imm(reg_off_scale_, reg_off_scale_, -+ padded_area * s_step * stype_sz_, X_TMP_0); -+ add_imm(x_ptr_scale_off, x_ptr_scale_off, -+ padded_area * s_step * stype_sz_, X_TMP_0); -+ } -+ if (compensation_needed_) { -+ add_imm(reg_off_comp_, reg_off_comp_, -+ padded_area * c_step * sizeof(int32_t), X_TMP_0); -+ add_imm(x_ptr_comp_off, x_ptr_comp_off, -+ padded_area * c_step * sizeof(int32_t), X_TMP_0); -+ } -+ } -+ - void loop_end(Label &l, XReg reg_cnt, int len, int i_step, int o_step, -- int s_step) { -- add_imm(reg_off_in, reg_off_in, i_step * itype_sz, X_TMP_0); -- add_imm(reg_off_out, reg_off_out, o_step * otype_sz, X_TMP_0); -- add_imm(x_ptr_in_off, x_ptr_in_off, i_step * itype_sz, X_TMP_0); -- add_imm(x_ptr_out_off, x_ptr_out_off, o_step * otype_sz, X_TMP_0); -+ int s_step, int c_step, const int curr_node_id) { -+ add_imm(reg_off_in_, reg_off_in_, i_step * itype_sz_, X_TMP_0); -+ add_imm(reg_off_out_, reg_off_out_, o_step * otype_sz_, X_TMP_0); -+ add_imm(x_ptr_in_off, x_ptr_in_off, i_step * itype_sz_, X_TMP_0); -+ add_imm(x_ptr_out_off, x_ptr_out_off, o_step * otype_sz_, X_TMP_0); - - if (prb_.scale_type == scale_type_t::MANY) { -- add_imm(reg_off_scale, reg_off_scale, s_step * stype_sz, X_TMP_0); -- add_imm(x_ptr_scale_off, x_ptr_scale_off, s_step * stype_sz, -+ add_imm(reg_off_scale_, reg_off_scale_, s_step * stype_sz_, -+ X_TMP_0); -+ add_imm(x_ptr_scale_off, x_ptr_scale_off, s_step * stype_sz_, - X_TMP_0); - } -+ -+ if (compensation_needed_) { -+ add_imm(reg_off_comp_, reg_off_comp_, c_step * sizeof(int32_t), -+ X_TMP_0); -+ add_imm(x_ptr_comp_off, x_ptr_comp_off, c_step * sizeof(int32_t), -+ X_TMP_0); -+ } -+ - subs(reg_cnt, reg_cnt, 1); - b(NE, l); - -- sub_imm(reg_off_in, reg_off_in, len * i_step * itype_sz, X_TMP_0); -- sub_imm(reg_off_out, reg_off_out, len * o_step * otype_sz, X_TMP_0); -- sub_imm(x_ptr_in_off, x_ptr_in_off, len * i_step * itype_sz, X_TMP_0); -- sub_imm(x_ptr_out_off, x_ptr_out_off, len * o_step * otype_sz, X_TMP_0); -+ if (prb_.tail(curr_node_id) != 0) { -+ Label if_end; -+ -+ // On the stack should be an information if node -+ // was processed with tail or not. -+ ldr(reg_tmp_, post_ptr(X_SP, reg_tmp_.getBit() / 8)); -+ -+ cmp(reg_tmp_, with_tail_info_); -+ b(NE, if_end); -+ finalize_tail_loop(i_step, o_step, s_step, c_step, curr_node_id); -+ L(if_end); -+ } -+ -+ // Restore offset to initial values. It means before -+ // loop execution. -+ sub_imm(reg_off_in_, reg_off_in_, len * i_step * itype_sz_, X_TMP_0); -+ sub_imm(reg_off_out_, reg_off_out_, len * o_step * otype_sz_, X_TMP_0); -+ sub_imm(x_ptr_in_off, x_ptr_in_off, len * i_step * itype_sz_, X_TMP_0); -+ sub_imm(x_ptr_out_off, x_ptr_out_off, len * o_step * otype_sz_, -+ X_TMP_0); - - if (prb_.scale_type == scale_type_t::MANY) { -- sub_imm(reg_off_scale, reg_off_scale, len * s_step * stype_sz, -+ sub_imm(reg_off_scale_, reg_off_scale_, len * s_step * stype_sz_, - X_TMP_0); -- sub_imm(x_ptr_scale_off, x_ptr_scale_off, len * s_step * stype_sz, -+ sub_imm(x_ptr_scale_off, x_ptr_scale_off, len * s_step * stype_sz_, - X_TMP_0); - } -+ if (compensation_needed_) { -+ sub_imm(reg_off_comp_, reg_off_comp_, -+ len * c_step * sizeof(int32_t), X_TMP_0); -+ sub_imm(x_ptr_comp_off, x_ptr_comp_off, -+ len * c_step * sizeof(int32_t), X_TMP_0); -+ } - } - -- void compute_blk_ker(const int len_unroll) { -+ void compute_blk_ker(const simple_impl_desc_t &desc) { -+ static constexpr bool with_tail_processing = true; -+ Label no_last_chunk, end_label; - int omp_ndims = prb_.full_ndims - prb_.ndims; -- Label no_last_blk, end_label; - -- if (prb_.ip_tail > 0 && prb_.op_tail == 0) { -- if (omp_ndims == 0) { -- cmp(reg_last_loop_cnt, 1); -- bne(no_last_blk); -- compute_ker(omp_ndims, len_unroll, true); -- } else { -- cmp(reg_blk_chunks, blk_cnt()); -- bne(no_last_blk); -- compute_ker(omp_ndims, len_unroll, true); -+ if (prb_.nodes[0].tail_size > 0) { -+ if (!prb_.nodes[0].is_parent_empty()) { -+ const int parent_node_id = prb_.nodes[0].parent_node_id; -+ ldr(reg_tmp_, ptr(data_chunk_addr(parent_node_id))); -+ check_if_this_is_last_chunk(reg_tmp_, parent_node_id); -+ b(NE, no_last_chunk); - } -+ -+ const int len_unroll = desc.tail_len_unroll > 0 -+ ? desc.tail_len_unroll -+ : desc.len_unroll; -+ compute_ker(omp_ndims, len_unroll, with_tail_processing); - b(end_label); - } - -- L(no_last_blk); -- compute_ker(omp_ndims, len_unroll, false); -+ L(no_last_chunk); -+ compute_ker(omp_ndims, desc.len_unroll, !with_tail_processing); - L(end_label); - } - -+ void create_loops(const simple_impl_desc_t &desc, -+ const std::array ®_cnt, int jit_loop) { -+ assert(jit_loop <= ndims_jit_loop_max); -+ -+ if (jit_loop > 0) { -+ const int nfu = desc.ndims_full_unroll; -+ const int unroll_factor -+ = jit_loop == 1 ? desc.len_last_dim_unroll : 1; -+ const int curr_node_id = nfu + (jit_loop - 1); -+ const int parent_node_id = prb_.nodes[curr_node_id].parent_node_id; -+ const int tail_size = prb_.tail(curr_node_id) / unroll_factor; -+ const int node_size = prb_.n(curr_node_id) / unroll_factor; -+ const XReg reg_loop_cnt = reg_cnt[jit_loop - 1]; -+ const bool curr_node_has_tail = prb_.tail(curr_node_id) != 0; -+ Label loop, if_no_tail, if_end; -+ -+ if (curr_node_has_tail) { -+ const size_t reg_bytes = reg_tmp_.getBit() / 8; -+ if (prb_.nodes[curr_node_id].is_parent_empty()) { -+ mov(reg_loop_cnt, tail_size); -+ // Put info that node is being processed with tail. -+ mov(reg_tmp_, with_tail_info_); -+ str(reg_tmp_, pre_ptr(X_SP, -reg_bytes)); -+ } else { -+ ldr(reg_tmp_, ptr(data_chunk_addr(parent_node_id))); -+ check_if_this_is_last_chunk(reg_tmp_, parent_node_id); -+ b(NE, if_no_tail); -+ mov(reg_loop_cnt, tail_size); -+ // Put info that node is being processed with tail. -+ mov(reg_tmp_, with_tail_info_); -+ str(reg_tmp_, pre_ptr(X_SP, -reg_bytes)); -+ b(if_end); -+ -+ L(if_no_tail); -+ mov(reg_loop_cnt, node_size); -+ // Put info that node is being processed without tail. -+ mov(reg_tmp_, without_tail_info_); -+ str(reg_tmp_, pre_ptr(X_SP, -reg_bytes)); -+ L(if_end); -+ } -+ } -+ -+ if (prb_.is_tail_in_one_of_child_nodes(curr_node_id)) { -+ if (!curr_node_has_tail) { -+ mov(reg_loop_cnt, node_size); -+ str(reg_loop_cnt, ptr(data_chunk_addr(curr_node_id))); -+ } -+ L(loop); -+ if (!prb_.nodes[curr_node_id].is_parent_empty()) { -+ Label if_no_tail_in_child_node; -+ ldr(reg_tmp_, ptr(data_chunk_addr(parent_node_id))); -+ check_if_this_is_last_chunk(reg_tmp_, parent_node_id); -+ b(NE, if_no_tail_in_child_node); -+ str(reg_loop_cnt, ptr(data_chunk_addr(curr_node_id))); -+ L(if_no_tail_in_child_node); -+ } else { -+ str(reg_loop_cnt, ptr(data_chunk_addr(curr_node_id))); -+ } -+ } else if (curr_node_has_tail) { -+ L(loop); -+ } else { -+ loop_begin(loop, reg_loop_cnt, node_size); -+ } -+ create_loops(desc, reg_cnt, jit_loop - 1); -+ -+ loop_end(loop, reg_loop_cnt, node_size, -+ prb_.is(curr_node_id) * unroll_factor, -+ prb_.os(curr_node_id) * unroll_factor, -+ prb_.ss(curr_node_id) * unroll_factor, -+ prb_.cs(curr_node_id) * unroll_factor, curr_node_id); -+ } else { -+ compute_blk_ker(desc); -+ } -+ } -+ - bool simple_impl() { - simple_impl_desc_t d; - if (!simple_impl_desc_init(prb_, &d)) return false; - -- const int nfu = d.ndims_full_unroll; -- const int ldu = d.len_last_dim_unroll; -- const int n_jit_loops = prb_.ndims - d.ndims_full_unroll; -- assert(n_jit_loops <= ndims_jit_loop_max); -- -- eor(reg_off_in, reg_off_in, reg_off_in); -- eor(reg_off_out, reg_off_out, reg_off_out); -- mov(x_ptr_in_off, XReg(reg_ptr_in.getIdx())); -- mov(x_ptr_out_off, XReg(reg_ptr_out.getIdx())); -+ eor(reg_off_in_, reg_off_in_, reg_off_in_); -+ eor(reg_off_out_, reg_off_out_, reg_off_out_); -+ mov(x_ptr_in_off, reg_ptr_in_); -+ mov(x_ptr_out_off, reg_ptr_out_); - if (prb_.scale_type == scale_type_t::MANY) { -- eor(reg_off_scale, reg_off_scale, reg_off_scale); -- mov(x_ptr_scale_off, XReg(reg_ptr_scale.getIdx())); -+ mov(reg_off_scale_, 0); -+ mov(x_ptr_scale_off, reg_ptr_scale_); -+ } -+ if (compensation_needed_) { -+ eor(reg_off_comp_, reg_off_comp_, reg_off_comp_); -+ mov(x_ptr_comp_off, reg_off_comp_); - } - -- Label l_loop[3]; -- XReg reg_cnt[3] = {x15, x14, x13}; -- -- if (n_jit_loops > 2) loop_begin(l_loop[2], reg_cnt[2], n(nfu + 2)); -- -- if (n_jit_loops > 1) loop_begin(l_loop[1], reg_cnt[1], n(nfu + 1)); -- -- if (n_jit_loops > 0) -- loop_begin(l_loop[0], reg_cnt[0], n(nfu + 0) / ldu); -- -- compute_blk_ker(d.len_unroll); -- -- if (n_jit_loops > 0) -- loop_end(l_loop[0], reg_cnt[0], n(nfu + 0) / ldu, is(nfu + 0) * ldu, -- os(nfu + 0) * ldu, ss(nfu + 0) * ldu); -- -- if (n_jit_loops > 1) -- loop_end(l_loop[1], reg_cnt[1], n(nfu + 1), is(nfu + 1), -- os(nfu + 1), ss(nfu + 1)); -+ std::array reg_cnt({{x15, x14, x13}}); - -- if (n_jit_loops > 2) -- loop_end(l_loop[2], reg_cnt[2], n(nfu + 2), is(nfu + 2), -- os(nfu + 2), ss(nfu + 2)); -+ const int n_jit_loops = prb_.ndims - d.ndims_full_unroll; -+ create_loops(d, reg_cnt, n_jit_loops); - - return true; - } -@@ -1156,7 +1577,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - inst(__VA_ARGS__); - - void cvt_z_s32_f32(const size_t startIdx, const size_t regNum) { -- UNROLL_INST(scvtf, ZRegS, tmp, p_all / T_m, tmp); -+ UNROLL_INST(scvtf, ZRegS, tmp, P_ALL_ONE / T_m, tmp); - } - - void cvt_v_s32_f32(const size_t startIdx, const size_t regNum) { -@@ -1164,8 +1585,8 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - } - - void cvt_z_f32_s32(const size_t startIdx, const size_t regNum) { -- UNROLL_INST(frinti, ZRegS, tmp, p_all / T_m, tmp); -- UNROLL_INST(fcvtzs, ZRegS, tmp, p_all / T_m, tmp); -+ UNROLL_INST(frinti, ZRegS, tmp, P_ALL_ONE / T_m, tmp); -+ UNROLL_INST(fcvtzs, ZRegS, tmp, P_ALL_ONE / T_m, tmp); - } - - void cvt_v_f32_s32(const size_t startIdx, const size_t regNum) { -@@ -1175,7 +1596,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - void cvt_z_s8_s32(const size_t startIdx, const size_t regNum) { - cvt_z_b_s(startIdx, regNum); -- UNROLL_INST(sxtb, ZRegS, tmp, p_all / T_m, tmp); -+ UNROLL_INST(sxtb, ZRegS, tmp, P_ALL_ONE / T_m, tmp); - } - - void cvt_v_s8_s32(const size_t startIdx, const size_t regNum) { -@@ -1214,7 +1635,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - void cvt_z_u8_s32(const size_t startIdx, const size_t regNum) { - cvt_z_b_s(startIdx, regNum); -- UNROLL_INST(uxtb, ZRegS, tmp, p_all / T_m, tmp); -+ UNROLL_INST(uxtb, ZRegS, tmp, P_ALL_ONE / T_m, tmp); - } - - void cvt_v_u8_s32(const size_t startIdx, const size_t regNum) { -@@ -1285,7 +1706,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - - dupm(z_tmp7.s, 255); - UNROLL_INST2(smax, ZRegS(i), 0); -- UNROLL_INST2(smin, ZRegS(i), p_all / T_m, z_tmp7.s); -+ UNROLL_INST2(smin, ZRegS(i), P_ALL_ONE / T_m, z_tmp7.s); - UNROLL_INST(uzp1, ZRegH, tmp, tmp, tmp); - UNROLL_INST(uzp1, ZRegB, tmp, tmp, tmp); - UNROLL_INST2(mov, ZRegB(i), P_NOT_128 / T_m, 0); -@@ -1320,107 +1741,514 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { - #undef UNROLL_INST - #undef UNROLL_INST - -- jit_uni_reorder_kernel_f32_t(const desc_t &desc) : kernel_t(desc) { -- itype_sz = data_type_size(prb_.itype); -- otype_sz = data_type_size(prb_.otype); -- stype_sz = sizeof(float); -+ jit_uni_reorder_kernel_f32_t(const desc_t &desc) -+ : kernel_t(desc), isa_(get_max_cpu_isa()) { -+ assert(!utils::one_of(isa_, isa_undef, isa_all)); -+ itype_sz_ = data_type_size(prb_.itype); -+ otype_sz_ = data_type_size(prb_.otype); -+ stype_sz_ = sizeof(float); - } - - void generate() override { - using namespace Xbyak_aarch64::util; - uint64_t sveLen = get_sve_length(); -+ Label end_of_kernel; - - preamble(); --#define PARAM(x) offsetof(call_param_t, x) -+ - if (prb_.scale_type == scale_type_t::COMMON) { -- add_imm(X_DEFAULT_ADDR, abi_param1, PARAM(scale), X_TMP_1); -+ add_imm(X_DEFAULT_ADDR, PARAM(scale), X_TMP_1); - ldr(X_TMP_0, ptr(X_DEFAULT_ADDR)); -- ldr(W_TMP_1, ptr(X_TMP_0)); -- dup(xmm_scale, W_TMP_1); -+ ld1r(xmm_scale_, ptr(X_TMP_0)); - } else if (prb_.scale_type == scale_type_t::MANY) { -- add_imm(X_DEFAULT_ADDR, abi_param1, PARAM(scale), X_TMP_0); -- ldr(reg_ptr_scale, ptr(X_DEFAULT_ADDR)); -+ add_imm(X_DEFAULT_ADDR, PARAM(scale), X_TMP_0); -+ ldr(reg_ptr_scale_, ptr(X_DEFAULT_ADDR)); - } -- add_imm(X_TMP_0, abi_param1, PARAM(in), X_TMP_2); -- add_imm(X_TMP_1, abi_param1, PARAM(out), X_TMP_2); -- add_imm(reg_blk, abi_param1, PARAM(blk_chunks), reg_blk); -- ldr(reg_ptr_in, ptr(X_TMP_0)); -- ldr(reg_ptr_out, ptr(X_TMP_1)); -- ldr(reg_blk_chunks, ptr(reg_blk)); -- --#undef PARAM -- mov_imm(reg_last_loop_cnt, 1); -+ if (compensation_needed_) { -+ add_imm(X_DEFAULT_ADDR, PARAM(compensation_scratch), X_TMP_0); -+ ldr(reg_ptr_comp_, ptr(X_DEFAULT_ADDR)); -+ } -+ if (prb_.scale_adjust == 0.5f) { mov(reg_scale_adjust_, 0x3f000000); } -+ add_imm(X_TMP_0, PARAM(in), X_TMP_2); -+ add_imm(X_TMP_1, PARAM(out), X_TMP_2); -+ ldr(reg_ptr_in_, ptr(X_TMP_0)); -+ ldr(reg_ptr_out_, ptr(X_TMP_1)); - -- mov(x_ptr_in_off, XReg(reg_ptr_in.getIdx())); -- mov(x_ptr_out_off, XReg(reg_ptr_out.getIdx())); -- mov(x_ptr_scale_off, XReg(reg_ptr_scale.getIdx())); -+ mov(x_ptr_in_off, reg_ptr_in_); -+ mov(x_ptr_out_off, reg_ptr_out_); -+ mov(x_ptr_scale_off, reg_ptr_scale_); -+ mov(x_ptr_comp_off, reg_ptr_comp_); - - if (sveLen) { /* SVE is available. */ - ptrue(p_lsb_256.b, VL32); -- ptrue(p_all.b); -+ ptrue(p_lsb_128.b, VL16); -+ ptrue(p_lsb_64.b, VL8); - } - -- if (can_do_tr8x8()) { -- dup(ymm_zero, 0); -- -- if (prb_.itype == data_type::u8 && prb_.otype == data_type::s8) { -- mov_imm(reg_tmp, 0x7f7f7f7f7f7f7f7f); -- mov(VReg4S(ymm_8x127b.getIdx())[0], WReg(reg_tmp.getIdx())); -+ bool is_tail_in_drv_dims = false; -+ for (int i = prb_.ndims; i < prb_.full_ndims; i++) -+ if (prb_.nodes[i].tail_size > 0) { -+ is_tail_in_drv_dims = true; -+ break; - } -- } else if (mayiuse(sve_512)) { -- movi(xmm_zero, 0); - -- if (prb_.itype == data_type::u8 && prb_.otype == data_type::s8) { -- mov(WReg(reg_tmp.getIdx()), 0x7f7f7f7f); -- mov(xmm_4x127b[0], WReg(reg_tmp.getIdx())); -+ if (is_tail_in_drv_dims) { -+ Label reorder_kernel; -+ add_imm(X_DEFAULT_ADDR, TAIL_PARAM(skip_kernel_execution), X_TMP_0); -+ ldr(reg_tmp_, ptr(X_DEFAULT_ADDR)); -+ cmp(reg_tmp_, static_cast(true)); -+ b(EQ, end_of_kernel); -+ -+ add_imm(X_DEFAULT_ADDR, TAIL_PARAM(zeroing_data), X_TMP_0); -+ ldr(reg_tmp_, ptr(X_DEFAULT_ADDR)); -+ cmp(reg_tmp_, static_cast(false)); -+ b(EQ, reorder_kernel); -+ // If zeroing data is set then all dst memory -+ // will be zeroed and nothing more will be done. -+ int bytes_to_zeroing = otype_sz_; -+ for (int i = 0; i < prb_.ndims; i++) { -+ bytes_to_zeroing *= prb_.nodes[i].n; - } -+ eor(reg_off_out_, reg_off_out_, reg_off_out_); -+ mov(x_ptr_out_off, reg_ptr_out_); -+ zero_dst_memory(bytes_to_zeroing); -+ b(end_of_kernel); -+ L(reorder_kernel); -+ } -+ -+ if (can_do_tr8x8()) { -+ dup(ymm_zero_, 0); -+ } else { -+ movi(xmm_zero_, 0); - } - - impl(); -+ -+ L(end_of_kernel); - postamble(); - } - -+ ~jit_uni_reorder_kernel_f32_t() override = default; -+ -+#undef TAIL_PARAM -+#undef PARAM -+ - private: -- int itype_sz; -- int otype_sz; -- int stype_sz; -+ static constexpr int64_t with_tail_info_ = static_cast(true); -+ static constexpr int64_t without_tail_info_ = static_cast(false); -+ -+ int itype_sz_; -+ int otype_sz_; -+ int stype_sz_; - -- XReg reg_ptr_in = x6; -- XReg reg_ptr_out = x2; -- XReg reg_ptr_scale = abi_not_param1; -+ const cpu_isa_t isa_; - -- XReg reg_off_in = x8; -- XReg reg_off_out = x9; -- XReg reg_off_scale = x10; -+ const XReg reg_ptr_in_ = x6; -+ const XReg reg_ptr_out_ = x2; -+ const XReg reg_ptr_scale_ = abi_not_param1; -+ const XReg reg_ptr_comp_ = x3; -+ const WReg ®_scale_adjust_ = w5; - -- XReg reg_blk = x11; -- XReg reg_blk_chunks = x12; -- XReg reg_last_loop_cnt = x11; -+ const XReg reg_off_in_ = x8; -+ const XReg reg_off_out_ = x9; -+ const XReg reg_off_scale_ = x10; -+ const XReg reg_off_comp_ = x11; - -- XReg reg_tmp = x0; -+ XReg reg_tmp_ = x12; - -- VReg4S xmm_scale = v15.s; -- VReg4S xmm_zero = v14.s; -- VReg4S xmm_4x127b = v13.s; // TODO: unite with ymm_zero -- ZRegS ymm_zero = z14.s; -- ZRegS ymm_8x127b = z13.s; -- VReg4S xmm_tmp = v12.s; -- VReg4S xmm_saturation_ubound = v12.s; -- ZRegS ymm_saturation_ubound = z12.s; -+ VReg4S xmm_scale_ = v15.s; -+ VReg4S xmm_zero_ = v14.s; -+ ZRegS ymm_zero_ = z14.s; -+ VReg4S xmm_tmp_ = v12.s; -+ const VReg4S xmm_src_zp_ = v9.s; -+ const VReg4S xmm_dst_zp_ = v11.s; -+ VReg4S xmm_saturation_ubound_ = v12.s; -+ ZRegS ymm_saturation_ubound_ = z12.s; - - /* Note: x22 - x28 are already used as temporal registgers - in jit_generator.hpp. -- x_ptr_(in|out|scale)_off keeps (base + offset) address. */ -+ x_ptr_(in|out|scale|comp)_off keeps (base + offset) address. */ - XReg x_ptr_in_off = x16; - XReg x_ptr_out_off = x18; - XReg x_ptr_scale_off = x20; -+ XReg x_ptr_comp_off = x17; - - /* Caution: Chose predicate registers not used by x64's implementation. */ - PReg p_lsb_256 = p7; -- PReg p_all = p6; -+ PReg p_lsb_128 = p6; -+ PReg p_lsb_64 = p4; - PReg p_tmp0 = p5; - - const std::vector tmp_vec_idx = {20, 21, 22, 23, 24, 25, 26, 27}; -+ VReg v_tmp0 = v20; -+ ZReg z_tmp0 = z20; -+ ZReg z_tmp1 = z21; -+ ZReg z_tmp2 = z22; -+ ZReg z_tmp3 = z23; -+ ZReg z_tmp4 = z24; -+ ZReg z_tmp5 = z25; -+ ZReg z_tmp6 = z26; -+ ZReg z_tmp7 = z27; -+ VReg v_tmp7 = v27; -+ -+ const std::vector z_tmp_vec -+ = {z_tmp0, z_tmp1, z_tmp2, z_tmp3, z_tmp4, z_tmp5, z_tmp6, z_tmp7}; -+ constexpr static int z_tmp_vec_size = 8; -+}; -+ -+// Seperate class for no unroll/threading burden -+struct jit_single_blk_kernel_t : public jit_generator { -+ DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_single_blk_kernel) -+ static bool applicable(const prb_t &p) { -+ using namespace data_type; -+ -+ bool ok = p.ndims >= 2 && mayiuse(sve_256) -+ && p.scale_type == scale_type_t::NONE -+ && utils::one_of(p.itype, f32) && utils::one_of(p.otype, f32) -+ && utils::everyone_is(0, p.ioff, p.ooff) && p.beta == 0.f -+ && prb_has_small_strides(p); -+ if (!ok) return false; -+ -+ int64_t n0 = p.nodes[0].n; -+ auto i0 = p.nodes[0].is; -+ auto o0 = p.nodes[0].os; -+ int64_t n1 = p.nodes[1].n; -+ auto i1 = p.nodes[1].is; -+ auto o1 = p.nodes[1].os; -+ -+ /* -+ * for a transpose of plain to 8c case, nodes would be like: -+ * n is os -+ * m 1 8 -+ * 8 m 1 -+ * or -+ * 8 m 1 -+ * m 1 8 -+ */ -+ ok = (utils::one_of(n0, 8, 16) || utils::one_of(n1, 8, 16)) -+ && ((i0 == 1 && o1 == 1 && n0 == i1 && o0 == n1) -+ || (o0 == 1 && i1 == 1 && n0 == o1 && i0 == n1)); -+ if (!ok) return false; -+ -+ // Do not handle transpose of dimensions other than last 2 -+ for (int i = 2; i < p.ndims; ++i) { -+ if (p.nodes[i].is != p.nodes[i].os) { -+ ok = false; -+ break; -+ } -+ } -+ -+ return ok; -+ } -+ -+ jit_single_blk_kernel_t(const tr::prb_t &prb) -+ : jit_generator() -+ , prb_(prb) -+ , itype_sz_(data_type_size(prb_.itype)) -+ , otype_sz_(data_type_size(prb_.otype)) -+ , block_sz(prb.nodes[0].n) {} -+ -+ void generate() override { -+ auto input_stride -+ = prb_.nodes[0].is != 1 ? prb_.nodes[0].is : prb_.nodes[1].is; -+ auto output_stride -+ = prb_.nodes[0].os != 1 ? prb_.nodes[0].os : prb_.nodes[1].os; -+ -+ Label tail_processing; -+ -+ const auto load_zp = [&](const ZRegS ymm_zp, const XReg reg_zp) { -+ dup(ymm_zp, WReg(reg_zp.getIdx())); -+ scvtf(ymm_zp, P_ALL_ONE / T_m, ymm_zp); -+ }; -+ -+ preamble(); -+ -+ if (prb_.req_src_zp) load_zp(ymm_src_zp, reg_src_zp); -+ -+ if (prb_.req_dst_zp) load_zp(ymm_dst_zp, reg_dst_zp); -+ -+ cmp(reg_ptr_tail, true); -+ b(EQ, tail_processing); -+ -+ if (block_sz == 8) { -+ gen_ker8x8(0, 0, input_stride, output_stride, 8, 8); -+ block_sz = 8; -+ } else if (block_sz == 16) { -+ gen_ker16x16_in_8x8(input_stride, output_stride); -+ block_sz = 16; -+ } else { -+ assert(!"unimplemented"); -+ } -+ -+ postamble(); -+ -+ L(tail_processing); -+ -+ if (block_sz == 8) { -+ auto i_tail = input_stride % 8 != 0 ? input_stride % 8 : 8; -+ auto o_tail = output_stride % 8 != 0 ? output_stride % 8 : 8; -+ if (i_tail != o_tail) { -+ auto t_mask = i_tail == 8 ? o_tail : i_tail; -+ gen_setmask(t_mask); -+ gen_ker8x8(0, 0, input_stride, output_stride, i_tail, o_tail); -+ } -+ } else if (block_sz == 16) { -+ auto i_tail = input_stride % 16 != 0 ? input_stride % 16 : 16; -+ auto o_tail = output_stride % 16 != 0 ? output_stride % 16 : 16; -+ if (i_tail != o_tail) { -+ auto t_mask = i_tail == 16 ? o_tail : i_tail; -+ t_mask %= 8; -+ if (t_mask != 0) gen_setmask(t_mask); -+ gen_ker16x16_in_8x8( -+ input_stride, output_stride, i_tail, o_tail); -+ } -+ } else { -+ assert(!"unimplemented"); -+ } -+ -+ postamble(); -+ } -+ -+ void gen_loadu(const ZRegS ymm, const XReg &addr, int size) { -+ QReg xmm(ymm.getIdx()); -+ switch (size) { -+ case 32: ld1w(ymm, p_lsb_256 / T_z, ptr(addr)); break; -+ case 16: ldr(xmm, ptr(addr)); break; -+ default: assert(!"unreachable"); -+ } -+ } -+ -+ void gen_storeu(const XReg &addr, const ZRegS ymm, int size) { -+ QReg xmm(ymm.getIdx()); -+ switch (size) { -+ case 32: st1w(ymm, p_lsb_256, ptr(addr)); break; -+ case 16: str(xmm, ptr(addr)); break; -+ default: assert(!"unreachable"); -+ } -+ } -+ -+ void gen_maskloadu( -+ const ZRegS ymm, const XReg &addr, const PReg mask, int size) { -+ switch (size) { -+ case 32: -+ case 16: ld1w(ymm, mask / T_z, ptr(addr)); break; -+ default: assert(!"unreachable"); -+ } -+ } -+ -+ void gen_maskstoreu( -+ const XReg &addr, const ZRegS ymm, const PReg mask, int size) { -+ switch (size) { -+ case 32: -+ case 16: st1w(ymm, mask, ptr(addr)); break; -+ default: assert(!"unreachable"); -+ } -+ } -+ -+ // Register allocation xmm0~11 -+ void gen_transpose_8x8() { -+ const uint64_t sveLen = get_sve_length(); -+ constexpr int lane = 8; -+ -+#if 0 -+ /* Debug code -+ z0: 7, 6, 5, 4, 3, 2, 1, 0 -+ z1: 15, 14, 13, 12, 11, 10, 9, 8 -+ ... -+ z17: 63, 62, 61, 60, 59, 58, 57, 56 -+ */ -+ ptrue(P_ALL_ONE.b); -+ ptrue(P_TMP.s, VL8); -+ not_(P_TMP.b, P_ALL_ONE/T_z, P_TMP.b); -+ index(z0.s, 0, 1); -+ mov(z0.s, P_TMP/T_m, 0); -+ mov(z_tmp_vec[0].s, 8); -+ mov(z_tmp_vec[0].s, P_TMP/T_m, 0); -+ for(uint32_t i=1; i nChw()C -+ // or nChw()C -> nchw -+ void gen_setmask(int mask) { -+ mov_imm(x_tmp_0, 0); -+ mov_imm(x_tmp_1, mask); -+ whilelt(p_mask.s, x_tmp_0, x_tmp_1); -+ } -+ -+ // TODO: Mark parameter with type information -+ // XXX: ! -+ // offset in byte offset -+ // stride in element number -+ // -+ // Gen specific 8x8 transform respect to certain tail condition -+ void gen_tr8x8(int i_off, int o_off, int input_stride, int output_stride, -+ int in_tail, int out_tail) { -+ constexpr int lane = 8; -+ -+ if (in_tail == 0 || out_tail == 0) return; -+ -+ for (int i = 0; i < out_tail; ++i) { -+ if (in_tail != lane) { -+ add_imm(x_addr, reg_ptr_in_, -+ i_off + i * input_stride * itype_sz_, x_tmp_0); -+ gen_maskloadu(ZRegS(i), x_addr, p_mask, lane * itype_sz_); -+ } else { -+ add_imm(x_addr, reg_ptr_in_, -+ i_off + i * input_stride * itype_sz_, x_tmp_0); -+ gen_loadu(ZRegS(i), x_addr, lane * itype_sz_); -+ } -+ if (prb_.req_src_zp) { fsub(ZRegS(i), ZRegS(i), ymm_src_zp); } -+ } -+ -+ gen_transpose_8x8(); -+ -+ for (int i = 0; i < in_tail; ++i) { -+ if (prb_.req_dst_zp) { fadd(ZRegS(i), ZRegS(i), ymm_dst_zp); } -+ if (out_tail == lane) { -+ add_imm(x_addr, reg_ptr_out_, -+ o_off + i * output_stride * otype_sz_, x_tmp_0); -+ gen_storeu(x_addr, ZRegS(i), lane * otype_sz_); -+ } else { -+ add_imm(x_addr, reg_ptr_out_, -+ o_off + i * output_stride * otype_sz_, x_tmp_0); -+ gen_maskstoreu(x_addr, ZRegS(i), p_mask, lane * otype_sz_); -+ } -+ } -+ } -+ -+ // tail: 0 ~ 8 -+ // support: either in_tail or out_tail is not 8, but not both -+ void gen_ker8x8(int i_off, int o_off, int input_stride, int output_stride, -+ int in_tail, int out_tail) { -+ gen_tr8x8(i_off, o_off, input_stride, output_stride, in_tail, out_tail); -+ } -+ -+ void gen_ker16x16_in_8x8(int input_stride, int output_stride) { -+ const auto lane = 16; -+ const auto sub_lane = lane / 2; -+ gen_tr8x8(0, 0, input_stride, output_stride, sub_lane, sub_lane); -+ gen_tr8x8(input_stride * sub_lane * itype_sz_, sub_lane * otype_sz_, -+ input_stride, output_stride, sub_lane, sub_lane); -+ gen_tr8x8(sub_lane * itype_sz_, output_stride * sub_lane * otype_sz_, -+ input_stride, output_stride, sub_lane, sub_lane); -+ gen_tr8x8((input_stride * sub_lane + sub_lane) * itype_sz_, -+ (output_stride * sub_lane + sub_lane) * otype_sz_, input_stride, -+ output_stride, sub_lane, sub_lane); -+ } -+ -+ // tail can be 1 ~ 16, using avx2 for now -+ void gen_ker16x16_in_8x8( -+ int input_stride, int output_stride, int in_tail, int out_tail) { -+ constexpr auto lane = 16; -+ constexpr auto sub_lane = lane / 2; -+ auto tail = in_tail != lane ? in_tail : out_tail; -+ -+ const auto l_tail = tail < sub_lane ? tail : sub_lane; -+ const auto u_tail = tail < sub_lane ? 0 : tail - sub_lane; -+ -+ if (tail == in_tail) { -+ gen_tr8x8(0, 0, input_stride, output_stride, l_tail, sub_lane); -+ gen_tr8x8(input_stride * sub_lane * itype_sz_, sub_lane * otype_sz_, -+ input_stride, output_stride, l_tail, sub_lane); -+ gen_tr8x8(sub_lane * itype_sz_, -+ output_stride * sub_lane * otype_sz_, input_stride, -+ output_stride, u_tail, sub_lane); -+ gen_tr8x8(itype_sz_ * (input_stride * sub_lane + sub_lane), -+ otype_sz_ * (output_stride * sub_lane + sub_lane), -+ input_stride, output_stride, u_tail, sub_lane); -+ } else { -+ gen_tr8x8(0, 0, input_stride, output_stride, sub_lane, l_tail); -+ gen_tr8x8(input_stride * sub_lane * itype_sz_, sub_lane * otype_sz_, -+ input_stride, output_stride, sub_lane, u_tail); -+ gen_tr8x8(sub_lane * itype_sz_, -+ output_stride * sub_lane * itype_sz_, input_stride, -+ output_stride, sub_lane, l_tail); -+ gen_tr8x8(itype_sz_ * (input_stride * sub_lane + sub_lane), -+ otype_sz_ * (output_stride * sub_lane + sub_lane), -+ input_stride, output_stride, sub_lane, u_tail); -+ } -+ } -+ -+private: -+ // 6 ~ 12 -+ constexpr static int xmm_save_for_windows = 0; -+ constexpr static int xmm_save_start_from = 6; -+ constexpr static int xmm_width = 16; -+ -+ void preamble() { ptrue(p_lsb_256.b, VL32); } -+ -+ void postamble() { ret(); } -+ -+ const prb_t &prb_; -+ -+ int itype_sz_; -+ int otype_sz_; -+ int block_sz; -+ -+ XReg reg_ptr_in_ = abi_param1; -+ XReg reg_ptr_out_ = abi_param2; -+ XReg reg_ptr_tail = abi_param3; -+ XReg reg_src_zp = abi_param4; -+ XReg reg_dst_zp = abi_param5; -+ -+ XReg x_addr = x10; -+ XReg x_tmp_0 = x11; -+ XReg x_tmp_1 = x12; -+ -+ /* Avoid P_TMP(p7) in jit_generator.hpp. */ -+ PReg p_lsb_256 = p6; -+ PReg p_mask = p5; -+ -+ ZRegS ymm_tmp = z0.s; -+ ZRegS ymm_src_zp = z14.s; -+ ZRegS ymm_dst_zp = z15.s; -+ -+ const std::vector tmp_vec_idx = {20, 21, 22, 23, 24, 25, 26, 27}; -+ VReg v_tmp0 = v20; - ZReg z_tmp0 = z20; - ZReg z_tmp1 = z21; - ZReg z_tmp2 = z22; -@@ -1472,15 +2300,31 @@ kernel_t *kernel_t::create(const kernel_t::desc_t &desc) { - - return nullptr; - } -+ - } // namespace tr - - static void prb_block_for_cache(tr::prb_t &prb) { - /* If strides for 0th and 1st nodes are cache friendly - * then one can altogether do away with blocking ! */ -- const bool cache_blocking_needed = false -- || (prb.nodes[0].is % 64 == 0 && prb.nodes[0].n > 16) -- || (prb.ndims > 1 && prb.nodes[1].is % 64 == 0 -- && prb.nodes[1].n > 16); -+ static constexpr int num_elems_thr = 16; -+ const bool stride_cache_friendly -+ = ((prb.nodes[0].is % 64 == 0 && prb.nodes[0].n > num_elems_thr) -+ || (prb.ndims > 1 && prb.nodes[1].is % num_elems_thr == 0 -+ && prb.nodes[1].n > num_elems_thr)) -+ && !prb.is_tail_present; -+ -+ // performance improvement for shapes with large inner-most dimension -+ const size_t L1_cache_sz -+ = size_t(3) * platform::get_per_core_cache_size(1) / 4; -+ const size_t itype_sz_ = data_type_size(prb.itype); -+ const size_t inner_block_sz = prb.nodes[0].n * itype_sz_; -+ const bool requires_inner_blocking = inner_block_sz > L1_cache_sz -+ // 'is_tail_present' is not supported for cache_blocking when -+ // asymmetric_comp is executed. -+ && IMPLICATION(prb.req_asymmetric_comp, !prb.is_tail_present); -+ -+ const bool cache_blocking_needed -+ = stride_cache_friendly || requires_inner_blocking; - if (!cache_blocking_needed) return; - - int unit_input_stride_idx = -1; -@@ -1496,28 +2340,58 @@ static void prb_block_for_cache(tr::prb_t &prb) { - const auto output_stride = prb.nodes[unit_input_stride_idx].os; - const auto num_elems = prb.nodes[unit_input_stride_idx].n; - -- const bool split_needed = (num_elems > 16) && (num_elems % 16 == 0); -+ const bool split_needed = (num_elems > num_elems_thr) -+ && (num_elems % num_elems_thr == 0); - const int move_location = (output_stride % 4 != 0) ? 0 : 1; -- if (split_needed) prb_node_split(prb, unit_input_stride_idx, 16); -+ if (split_needed) -+ prb_node_split(prb, unit_input_stride_idx, num_elems_thr); - - /* Because of cache-unfriendly nature of unit-output stride node, let - * us move unit-input stride node on or near front! */ -- prb_node_move(prb, unit_input_stride_idx, move_location); -+ if (unit_input_stride_idx != move_location) -+ prb_node_move(prb, unit_input_stride_idx, move_location); - } - - /* Potentially, split the node with os=1 in two and pull in the node with - * is=1 between them for better cache reuse: - * [n0:is0:1][n1:1:os1] --> [16n0:is0:1][n1:1:os1][n0/16:is0*16:16] */ - if (prb.ndims >= 2 && prb.nodes[0].os == 1 && prb.nodes[1].is == 1) { -- const auto input_stride = prb.nodes[0].is; - const auto num_elems = prb.nodes[0].n; - -- const bool split_needed = true && (num_elems > 16) -- && (num_elems % 16 == 0) && (input_stride >= 256) -- && (input_stride % 64 == 0); -+ const bool split_needed = (num_elems > num_elems_thr) -+ && (num_elems % num_elems_thr == 0); - if (split_needed) { -- prb_node_split(prb, 0, 16); -+ prb_node_split(prb, 0, num_elems_thr); - prb_node_move(prb, 1, 2); -+ -+ // Update node information -+ prb_node_dependency(prb); -+ -+ // heuristics - looping over the unrolled dims should maximize reuse -+ // of the already cached data; observation is choosing the smallest -+ // dim from the remaining (from 2 up to ndims) gives good results -+ constexpr int new_position = 2; -+ const auto dim_beg_it = std::begin(prb.nodes); -+ const auto dim_two_it = dim_beg_it + new_position; -+ const auto dim_last_it = dim_beg_it + prb.ndims; -+ const auto min_n_node_it = std::min_element(dim_two_it, dim_last_it, -+ [](const tr::node_t &lhs, const tr::node_t &rhs) { -+ return lhs.n < rhs.n; -+ }); -+ const auto min_idx = std::distance(dim_beg_it, min_n_node_it); -+ // check if min_idx node is parent of node with tail processing which -+ // is currently unsupported (i.e. tail processing can only be handled -+ // at the inner-most dimension) -+ bool inner_block_has_tail = false; -+ for (int idx = min_idx - 1; idx >= new_position; idx--) { -+ if (prb.nodes[idx].parent_node_id == min_idx) { -+ inner_block_has_tail = true; -+ break; -+ } -+ } -+ -+ if (min_idx > new_position && (!inner_block_has_tail)) -+ prb_node_move(prb, min_idx, new_position); - } - } - } -@@ -1527,73 +2401,76 @@ static void prb_block_for_cache(tr::prb_t &prb) { - * parallel driver and the kernel. */ - static void prb_thread_kernel_balance( - tr::prb_t &prb, int &ndims_ker_max, int nthr) { -- size_t sz_total = 1; -+ size_t size_total = 1; - for (int d = 0; d < prb.ndims; ++d) -- sz_total *= prb.nodes[d].n; -+ size_total *= prb.nodes[d].n; - -- /* The general expression for sz_drv_thr can be written as -- * sz_drv_min = C0 + FC * (nthr > 1 ? 1 : 0) + VC * (nthr - 1) -+ /* The general expression for size_drv_thr can be written as -+ * size_drv_min = C0 + FC * (nthr > 1 ? 1 : 0) + VC * (nthr - 1) - * where FC and VC are fixed and variable costs respectively. - * Though for now, the below heuristic seems to be good enough */ -- const size_t sz_drv_thr = (nthr > 1) ? 16 * nthr : 1; -+ const size_t size_drv_thr = (nthr > 1) ? 16 * nthr : 1; - -- /* sz_drv_min is the minimal size for the parallel -+ /* size_drv_min is the minimal size for the parallel - * driver required for good parallelization */ -- const size_t sz_drv_min -- = nstl::min(sz_drv_thr, utils::div_up(sz_total, 1024)); -+ const size_t size_drv_min -+ = nstl::min(size_drv_thr, utils::div_up(size_total, 1024)); - - /* kdims -- # of dimensions processed by a kernel -- * sz_ker_cur -- product of the dimension processed by a kernel -- * sz_drv_cur -- product of the dimension processed by a driver */ -+ * size_ker_cur -- product of the dimension processed by a kernel -+ * size_drv_cur -- product of the dimension processed by a driver */ - - int kdims = prb.ndims; -- size_t sz_drv_cur = 1; -- for (; kdims > 1 && sz_drv_cur < sz_drv_min; --kdims) -- sz_drv_cur *= prb.nodes[kdims - 1].n; -+ size_t size_drv_cur = 1; -+ for (; kdims > 1 && size_drv_cur < size_drv_min; --kdims) -+ size_drv_cur *= prb.nodes[kdims - 1].n; - -- size_t sz_ker_cur = 1; -+ size_t size_ker_cur = 1; - for (int d = 0; d < kdims; ++d) -- sz_ker_cur *= prb.nodes[d].n; -+ size_ker_cur *= prb.nodes[d].n; - -- /* Initially kdims is chosen so that sz_drv_cur >= sz_drv_min. -+ /* Initially kdims is chosen so that size_drv_cur >= size_drv_min. - * -- * It might happen that for chosen kdims the sz_ker_cur is too small -+ * It might happen that for chosen kdims the size_ker_cur is too small - * (less than tr::ker_prb_size_min). In that case try to split the -- * innermost driver dimension into two, to increase sz_ker_cur. */ -- bool want_borrow_ker_from_drv = true && kdims < prb.ndims -- && sz_ker_cur < tr::ker_prb_size_min && sz_drv_cur > sz_drv_min -- && kdims != prb.blk_chunk_idx; -+ * innermost driver dimension into two, to increase size_ker_cur. */ -+ const bool want_borrow_ker_from_drv = kdims < prb.ndims -+ && size_ker_cur < tr::ker_prb_size_min -+ && size_drv_cur > size_drv_min; - if (want_borrow_ker_from_drv) { -- /* sz_want_borrow is the minimal sz, so that: -- * o) sz_ker_cur * sz_want_borrow >= tr::ker_prb_size_min -+ /* size_want_borrow is the minimal size, so that: -+ * o) size_ker_cur * size_want_borrow >= tr::ker_prb_size_min - * o) current innermost driver dimension is divisible by -- * sz_want_borrow (so that we can evenly split that -+ * size_want_borrow (so that we can evenly split that - * dimension into two) - * -- * In the worst case the minimal sz_want_borrow is equal -+ * In the worst case the minimal size_want_borrow is equal - * to the innermost driver dimension itself. In that case - * we will sacrifice it in favor of kernel (is it fine?). */ -- size_t sz_want_borrow = utils::div_up(tr::ker_prb_size_min, sz_ker_cur); -- for (; prb.nodes[kdims].n % sz_want_borrow; ++sz_want_borrow) -+ size_t size_want_borrow -+ = utils::div_up(tr::ker_prb_size_min, size_ker_cur); -+ for (; prb.nodes[kdims].n % size_want_borrow; ++size_want_borrow) - ; -- if (sz_want_borrow != prb.nodes[kdims].n) -- prb_node_split(prb, kdims, sz_want_borrow); -+ -+ if (size_want_borrow != prb.nodes[kdims].n) -+ prb_node_split(prb, kdims, size_want_borrow); - kdims += 1; - } - - /* On the other hand it might happen that for chosen kdims -- * the sz_drv_cur is too small (less than sz_drv_min). In that case -+ * the size_drv_cur is too small (less than size_drv_min). In that case - * try to split the outermost kernel dimension into two, to increase -- * sz_drv_cur. */ -- bool want_borrow_drv_from_ker = true && sz_ker_cur > tr::ker_prb_size_min -- && sz_drv_cur < sz_drv_min && kdims != prb.blk_chunk_idx; -+ * size_drv_cur. */ -+ const bool want_borrow_drv_from_ker = size_ker_cur > tr::ker_prb_size_min -+ && size_drv_cur < size_drv_min; - if (want_borrow_drv_from_ker) { -- size_t sz_want_borrow = utils::div_up(sz_drv_min, sz_drv_cur); -- for (; prb.nodes[kdims - 1].n % sz_want_borrow; ++sz_want_borrow) -+ size_t size_want_borrow = utils::div_up(size_drv_min, size_drv_cur); -+ for (; prb.nodes[kdims - 1].n % size_want_borrow; ++size_want_borrow) - ; -- if (sz_want_borrow != prb.nodes[kdims - 1].n) -+ -+ if (size_want_borrow != prb.nodes[kdims - 1].n) - prb_node_split( -- prb, kdims - 1, prb.nodes[kdims - 1].n / sz_want_borrow); -+ prb, kdims - 1, prb.nodes[kdims - 1].n / size_want_borrow); - } - - ndims_ker_max = kdims; -@@ -1607,6 +2484,33 @@ static void prb_thread_kernel_balance( - } - } - -+status_t jit_uni_reorder_t::pd_t::init( -+ engine_t *engine, engine_t *src_engine, engine_t *dst_engine) { -+ CHECK(cpu_reorder_pd_t::init(engine, src_engine, dst_engine)); -+ -+ const bool compensation_needed -+ = prb_.req_s8s8_comp || prb_.req_asymmetric_comp; -+ if (compensation_needed) init_scratchpad(); -+ -+ return status::success; -+} -+ -+void jit_uni_reorder_t::pd_t::init_scratchpad() { -+ const memory_desc_wrapper od(dst_md()); -+ const auto G = with_groups_ ? od.padded_dims()[0] : 1; -+ const auto N = od.padded_dims()[with_groups_ ? 1 : 0]; -+ static constexpr int cache_line_size = 16; -+ const auto wspace_per_thr_size -+ = utils::rnd_up(G * N, cache_line_size) * sizeof(int32_t); -+ -+ auto scratchpad = scratchpad_registry().registrar(); -+ const auto compensation_reduce_size = wspace_per_thr_size * nthr_; -+ -+ // Every thread gets its own scratchpad space for each N -+ scratchpad.template book(memory_tracking::names::key_reorder_space, -+ compensation_reduce_size); -+} -+ - status_t jit_uni_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, - engine_t *engine, const primitive_attr_t *attr, engine_t *src_engine, - const memory_desc_t *src_md, engine_t *dst_engine, -@@ -1616,36 +2520,18 @@ status_t jit_uni_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, - status_t prb_init_status = prb_init(prb, *src_md, *dst_md, attr); - if (prb_init_status != status::success) return prb_init_status; - -- DEBUG({ -- printf("init : "); -- prb_dump(prb); -- }); -- // Sort the prb array in increasing sizes of the output stride -- prb_normalize(prb); -- DEBUG({ -- printf("norm : "); -- prb_dump(prb); -- }); -- /* Combine the variables, which appear together on both -- * sides of the reorder */ -- prb_simplify(prb); -- DEBUG({ -- printf("smpl : "); -- prb_dump(prb); -- }); -- - prb_block_for_cache(prb); - DEBUG({ - printf("cache: "); - prb_dump(prb); - }); - -- CHECK(prb_check_blk(prb, *dst_md)); -- -- int ndims_ker_max; -+ int ndims_ker_max {}; - int nthr = dnnl_get_max_threads(); - prb_thread_kernel_balance(prb, ndims_ker_max, nthr); - -+ if (prb.is_tail_present) prb_node_dependency(prb); -+ - tr::kernel_t::desc_t ker_desc; - status_t ker_init_status - = tr::kernel_t::desc_init(ker_desc, prb, ndims_ker_max); -@@ -1663,99 +2549,191 @@ status_t jit_uni_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, - auto _pd = new pd_t( - attr, src_engine->kind(), src_md, dst_engine->kind(), dst_md); - if (_pd == nullptr) return status::out_of_memory; -+ -+ _pd->nthr_ = nthr; -+ _pd->prb_ = prb; -+ _pd->with_groups_ -+ = prb.compensation_mask == tr::prb_t::comp_mask_with_groups; - if (_pd->init(engine, src_engine, dst_engine) != status::success) { - delete _pd; - return status::unimplemented; - } -- _pd->prb_ = prb; - _pd->ker_desc_ = ker_desc; - _pd->init_scratchpad_md(); -- _pd->nthr_ = nthr; -+ - return safe_ptr_assign(*reorder_pd, _pd); - } - --void jit_uni_reorder_t::omp_driver_0d( -- int off, const char *in, char *out, const float *scale) const { -- tr::call_param_t c {in, out, scale, 0}; -- (*kernel_)(&c); -+void jit_uni_reorder_t::omp_driver_0d(int off, const char *in, char *out, -+ const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const { -+ const tr::prb_t &prb = pd()->prb_; -+ -+ tr::call_param_t base_params; -+ base_params.in = in; -+ base_params.out = out; -+ base_params.scale = scale; -+ base_params.src_zp = src_zp; -+ base_params.dst_zp = dst_zp; -+ base_params.compensation_scratch = compensation_scratch; -+ -+ if (prb.is_tail_present) { -+ tr::tail_call_param_t tail_params; -+ tail_params.base_params = base_params; -+ -+ static constexpr int omp_ndims = 0; -+ fill_curr_data_chunks(prb, off, nullptr, omp_ndims, tail_params); -+ (*kernel_)(&tail_params); -+ } else { -+ (*kernel_)(&base_params); -+ } - } - - void jit_uni_reorder_t::omp_driver_1d(int ithr, int nthr, int off, -- const char *in, char *out, const float *scale) const { -- const tr::node_t *ns = pd()->prb_.nodes + off; -+ const char *in, char *out, const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const { -+ const tr::prb_t &prb = pd()->prb_; -+ const tr::node_t *ns = prb.nodes + off; - for_nd(ithr, nthr, (ptrdiff_t)ns[0].n, [&](ptrdiff_t d0) { -- auto c = tr::call_param_t(); -- c.in = in + d0 * ns[0].is * data_type_size(pd()->prb_.itype); -- c.out = out + d0 * ns[0].os * data_type_size(pd()->prb_.otype); -- c.scale = scale + d0 * ns[0].ss; -- c.blk_chunks = d0; -- (*kernel_)(&c); -+ tr::call_param_t base_params; -+ base_params.in = in + d0 * ns[0].is * data_type_size(prb.itype); -+ base_params.out = out + d0 * ns[0].os * data_type_size(prb.otype); -+ base_params.scale = scale + d0 * ns[0].ss; -+ base_params.src_zp = src_zp; -+ base_params.dst_zp = dst_zp; -+ base_params.compensation_scratch = compensation_scratch + d0 * ns[0].cs; -+ -+ if (prb.is_tail_present) { -+ tr::tail_call_param_t tail_params; -+ tail_params.base_params = base_params; -+ -+ static constexpr int omp_ndims = 1; -+ const ptrdiff_t omp_data_chunks[omp_ndims] = {d0}; -+ fill_curr_data_chunks( -+ prb, off, omp_data_chunks, omp_ndims, tail_params); -+ (*kernel_)(&tail_params); -+ } else { -+ (*kernel_)(&base_params); -+ } - }); - } - - void jit_uni_reorder_t::omp_driver_2d(int ithr, int nthr, int off, -- const char *in, char *out, const float *scale) const { -- const tr::node_t *ns = pd()->prb_.nodes + off; -- const int blk_idx_off = pd()->prb_.blk_chunk_idx - off; -+ const char *in, char *out, const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const { -+ const tr::prb_t &prb = pd()->prb_; -+ const tr::node_t *ns = prb.nodes + off; - for_nd(ithr, nthr, (ptrdiff_t)ns[1].n, (ptrdiff_t)ns[0].n, - [&](ptrdiff_t d1, ptrdiff_t d0) { -- auto c = tr::call_param_t(); -- c.in = in -+ tr::call_param_t base_params; -+ base_params.in = in - + (d0 * ns[0].is + d1 * ns[1].is) -- * data_type_size(pd()->prb_.itype); -- c.out = out -+ * data_type_size(prb.itype); -+ base_params.out = out - + (d0 * ns[0].os + d1 * ns[1].os) -- * data_type_size(pd()->prb_.otype); -- c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss; -- c.blk_chunks = utils::pick(blk_idx_off, d0, d1); -- (*kernel_)(&c); -+ * data_type_size(prb.otype); -+ base_params.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss; -+ base_params.src_zp = src_zp; -+ base_params.dst_zp = dst_zp; -+ base_params.compensation_scratch -+ = compensation_scratch + d0 * ns[0].cs + d1 * ns[1].cs; -+ -+ if (prb.is_tail_present) { -+ tr::tail_call_param_t tail_params; -+ tail_params.base_params = base_params; -+ -+ static constexpr int omp_ndims = 2; -+ const ptrdiff_t omp_data_chunks[omp_ndims] = {d0, d1}; -+ fill_curr_data_chunks( -+ prb, off, omp_data_chunks, omp_ndims, tail_params); -+ -+ (*kernel_)(&tail_params); -+ } else { -+ (*kernel_)(&base_params); -+ } - }); - } - - void jit_uni_reorder_t::omp_driver_3d(int ithr, int nthr, int off, -- const char *in, char *out, const float *scale) const { -- const tr::node_t *ns = pd()->prb_.nodes + off; -- const int blk_idx_off = pd()->prb_.blk_chunk_idx - off; -+ const char *in, char *out, const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const { -+ const tr::prb_t &prb = pd()->prb_; -+ const tr::node_t *ns = prb.nodes + off; - for_nd(ithr, nthr, (ptrdiff_t)ns[2].n, (ptrdiff_t)ns[1].n, - (ptrdiff_t)ns[0].n, [&](ptrdiff_t d2, ptrdiff_t d1, ptrdiff_t d0) { -- auto c = tr::call_param_t(); -- c.in = in -+ tr::call_param_t base_params; -+ base_params.in = in - + (d0 * ns[0].is + d1 * ns[1].is + d2 * ns[2].is) -- * data_type_size(pd()->prb_.itype); -- c.out = out -+ * data_type_size(prb.itype); -+ base_params.out = out - + (d0 * ns[0].os + d1 * ns[1].os + d2 * ns[2].os) -- * data_type_size(pd()->prb_.otype); -- c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss; -- c.blk_chunks = utils::pick(blk_idx_off, d0, d1, d2); -- (*kernel_)(&c); -+ * data_type_size(prb.otype); -+ base_params.scale -+ = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss; -+ base_params.src_zp = src_zp; -+ base_params.dst_zp = dst_zp; -+ base_params.compensation_scratch = compensation_scratch -+ + d0 * ns[0].cs + d1 * ns[1].cs + d2 * ns[2].cs; -+ -+ if (prb.is_tail_present) { -+ tr::tail_call_param_t tail_params; -+ tail_params.base_params = base_params; -+ -+ static constexpr int omp_ndims = 3; -+ const ptrdiff_t omp_data_chunks[omp_ndims] = {d0, d1, d2}; -+ fill_curr_data_chunks( -+ prb, off, omp_data_chunks, omp_ndims, tail_params); -+ (*kernel_)(&tail_params); -+ } else { -+ (*kernel_)(&base_params); -+ } - }); - } - - void jit_uni_reorder_t::omp_driver_4d(int ithr, int nthr, int off, -- const char *in, char *out, const float *scale) const { -- const tr::node_t *ns = pd()->prb_.nodes + off; -- const int blk_idx_off = pd()->prb_.blk_chunk_idx - off; -+ const char *in, char *out, const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const { -+ const tr::prb_t &prb = pd()->prb_; -+ const tr::node_t *ns = prb.nodes + off; - for_nd(ithr, nthr, (ptrdiff_t)ns[3].n, (ptrdiff_t)ns[2].n, - (ptrdiff_t)ns[1].n, (ptrdiff_t)ns[0].n, - [&](ptrdiff_t d3, ptrdiff_t d2, ptrdiff_t d1, ptrdiff_t d0) { -- auto c = tr::call_param_t(); -- c.in = in -+ tr::call_param_t base_params; -+ base_params.in = in - + (d0 * ns[0].is + d1 * ns[1].is + d2 * ns[2].is - + d3 * ns[3].is) -- * data_type_size(pd()->prb_.itype); -- c.out = out -+ * data_type_size(prb.itype); -+ base_params.out = out - + (d0 * ns[0].os + d1 * ns[1].os + d2 * ns[2].os - + d3 * ns[3].os) -- * data_type_size(pd()->prb_.otype); -- c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss -- + d3 * ns[3].ss; -- c.blk_chunks = utils::pick(blk_idx_off, d0, d1, d2, d3); -- (*kernel_)(&c); -+ * data_type_size(prb.otype); -+ base_params.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss -+ + d2 * ns[2].ss + d3 * ns[3].ss; -+ base_params.src_zp = src_zp; -+ base_params.dst_zp = dst_zp; -+ base_params.compensation_scratch = compensation_scratch -+ + d0 * ns[0].cs + d1 * ns[1].cs + d2 * ns[2].cs -+ + d3 * ns[3].cs; -+ -+ if (prb.is_tail_present) { -+ tr::tail_call_param_t tail_params; -+ tail_params.base_params = base_params; -+ -+ static constexpr int omp_ndims = 4; -+ const ptrdiff_t omp_data_chunks[omp_ndims] -+ = {d0, d1, d2, d3}; -+ fill_curr_data_chunks( -+ prb, off, omp_data_chunks, omp_ndims, tail_params); -+ (*kernel_)(&tail_params); -+ } else { -+ (*kernel_)(&base_params); -+ } - }); - } - --void jit_uni_reorder_t::omp_driver( -- const char *in, char *out, const float *scale) const { -+void jit_uni_reorder_t::omp_driver(const char *in, char *out, -+ const float *scale, int src_zp, int dst_zp, -+ const memory_tracking::grantor_t &scratchpad) const { - in += pd()->prb_.ioff * data_type_size(pd()->prb_.itype); - out += pd()->prb_.ooff * data_type_size(pd()->prb_.otype); - -@@ -1770,29 +2748,153 @@ void jit_uni_reorder_t::omp_driver( - - int ndims = pd()->prb_.ndims; - int ndims_ker = pd()->ker_desc_.prb.ndims; -+ const bool req_s8s8_comp = pd()->prb_.req_s8s8_comp; -+ const bool req_asymmetric_comp = pd()->prb_.req_asymmetric_comp; -+ const bool req_compensation = req_s8s8_comp || req_asymmetric_comp; - assert(ndims - ndims_ker <= ndims_driver_max); - -+ int32_t *compensation_reduce_scratch = scratchpad.template get( -+ memory_tracking::names::key_reorder_space); -+ -+ const memory_desc_wrapper od(pd()->dst_md()); -+ const auto G = pd()->with_groups_ ? od.padded_dims()[0] : 1; -+ const auto N = od.padded_dims()[pd()->with_groups_ ? 1 : 0]; -+ static constexpr int cache_line_size = 16; -+ const auto wspace_per_thr_size = utils::rnd_up(G * N, cache_line_size); -+ const auto wspace_per_thr_bytes = wspace_per_thr_size * sizeof(int32_t); -+ - if (ndims - ndims_ker == 0) { -- omp_driver_0d(ndims_ker, in, out, scale); -+ if (req_compensation) -+ std::memset(compensation_reduce_scratch, 0, wspace_per_thr_bytes); -+ -+ omp_driver_0d(ndims_ker, in, out, scale, src_zp, dst_zp, -+ compensation_reduce_scratch); - } else { - parallel(pd()->nthr_, [&](const int ithr, const int nthr) { -+ int32_t *compensation_scratch = nullptr; -+ if (req_compensation) { -+ compensation_scratch = &compensation_reduce_scratch[ithr -+ * wspace_per_thr_size]; -+ std::memset(compensation_scratch, 0, wspace_per_thr_bytes); -+ } -+ - switch (ndims - ndims_ker) { - case 1: -- omp_driver_1d(ithr, nthr, ndims_ker, in, out, scale); -+ omp_driver_1d(ithr, nthr, ndims_ker, in, out, scale, src_zp, -+ dst_zp, compensation_scratch); - break; - case 2: -- omp_driver_2d(ithr, nthr, ndims_ker, in, out, scale); -+ omp_driver_2d(ithr, nthr, ndims_ker, in, out, scale, src_zp, -+ dst_zp, compensation_scratch); - break; - case 3: -- omp_driver_3d(ithr, nthr, ndims_ker, in, out, scale); -+ omp_driver_3d(ithr, nthr, ndims_ker, in, out, scale, src_zp, -+ dst_zp, compensation_scratch); - break; - case 4: -- omp_driver_4d(ithr, nthr, ndims_ker, in, out, scale); -+ omp_driver_4d(ithr, nthr, ndims_ker, in, out, scale, src_zp, -+ dst_zp, compensation_scratch); - break; - default: assert(!"unimplemented"); - } - }); - } -+ -+ // Reduction of intermediate compensation results to the final output -+ if (req_compensation) { -+ const int nthr = ndims - ndims_ker == 0 ? 1 : pd()->nthr_; -+ reduce_compensation( -+ out, compensation_reduce_scratch, nthr, wspace_per_thr_size); -+ } -+} -+ -+void jit_uni_reorder_t::reduce_compensation(char *out, -+ const int32_t *compensation_reduce_scratch, const int nthr, -+ const dim_t wspace_per_thr_size) const { -+ -+ const memory_desc_wrapper od(pd()->dst_md()); -+ const size_t offset = od.size() - od.additional_buffer_size(); -+ -+ static constexpr auto comp_dt_size = sizeof(int32_t); -+ static constexpr int32_t comp_s8s8_shift = 128; -+ -+ // Note: We do not need to explicitly zero-out compensation buffer, as the -+ // per_thread buffers are already zeroed out in the padded area. -+ const auto G = pd()->with_groups_ ? od.padded_dims()[0] : 1; -+ const auto N = od.padded_dims()[pd()->with_groups_ ? 1 : 0]; -+ const auto GN = G * N; -+ const bool req_s8s8_comp = pd()->prb_.req_s8s8_comp; -+ const bool req_asymmetric_comp = pd()->prb_.req_asymmetric_comp; -+ const size_t zp_offset -+ = offset + (pd()->prb_.req_s8s8_comp ? GN * comp_dt_size : 0); -+ -+ parallel_nd(GN, [&](int idx) { -+ int32_t acc = 0; -+ for (int ithr = 0; ithr < nthr; ithr++) { -+ acc -= compensation_reduce_scratch[ithr * wspace_per_thr_size -+ + idx]; -+ } -+ if (req_s8s8_comp) { -+ int32_t *out_comp = reinterpret_cast(&out[offset]); -+ out_comp[idx] = comp_s8s8_shift * acc; -+ } -+ if (req_asymmetric_comp) { -+ int32_t *out_asym_comp -+ = reinterpret_cast(&out[zp_offset]); -+ out_asym_comp[idx] = acc; -+ } -+ }); -+} -+ -+void jit_uni_reorder_t::fill_curr_data_chunks(const tr::prb_t &prb, -+ const int off, const ptrdiff_t *omp_data_chunks, const int omp_ndims, -+ tr::tail_call_param_t &c) const { -+ // Chunks are backwards numered i.e: -+ // [0] -> [node_size] -+ // [1] -> [node_size - 1] -+ // ... -+ // [node_size - 1] -> [1] -+ -+ // It is done like this, because it is easier to decrement counter -+ // and check if it is equal to zero than increment and check -+ // if it is equal to node_size in jit kernel. -+ -+ static constexpr int64_t empty_chunk_info = -1; -+ static constexpr int64_t last_chunk = 1; -+ -+ for (int curr_node_id = prb.ndims - 1; curr_node_id >= 0; curr_node_id--) { -+ const int parent_node_id = prb.nodes[curr_node_id].parent_node_id; -+ const bool is_drv_processing_this_node -+ = curr_node_id >= off && curr_node_id <= off + omp_ndims - 1; -+ const bool is_tail_processing -+ = prb.is_tail_in_one_of_child_nodes(curr_node_id) -+ || prb.nodes[curr_node_id].tail_size > 0; -+ -+ if (is_drv_processing_this_node && is_tail_processing) { -+ const int inner_idx = curr_node_id - off; -+ assert(inner_idx < omp_ndims); -+ const int64_t node_size = prb.nodes[curr_node_id].tail_size > 0 -+ ? prb.nodes[curr_node_id].tail_size -+ : prb.nodes[curr_node_id].n; -+ const int64_t data_chunk = node_size - omp_data_chunks[inner_idx]; -+ -+ if (!prb.nodes[curr_node_id].is_parent_empty()) { -+ const bool is_parent_chunk_last -+ = c.curr_data_chunks[parent_node_id] == last_chunk; -+ c.curr_data_chunks[curr_node_id] -+ = is_parent_chunk_last ? data_chunk : empty_chunk_info; -+ c.zeroing_data = static_cast( -+ is_parent_chunk_last && data_chunk <= 0); -+ } else { -+ c.curr_data_chunks[curr_node_id] = data_chunk; -+ c.zeroing_data = static_cast(data_chunk <= 0); -+ } -+ c.skip_kernel_execution = static_cast(c.zeroing_data -+ && !prb.nodes[curr_node_id].is_zero_pad_needed); -+ if (c.zeroing_data || c.skip_kernel_execution) break; -+ } else -+ c.curr_data_chunks[curr_node_id] = empty_chunk_info; -+ } - } - - status_t jit_uni_reorder_t::init(engine_t *engine) { -@@ -1801,13 +2903,98 @@ status_t jit_uni_reorder_t::init(engine_t *engine) { - } - - status_t jit_uni_reorder_t::execute(const exec_ctx_t &ctx) const { -- status_t status = status::success; - auto in = CTX_IN_MEM(const char *, DNNL_ARG_FROM); -- auto out = CTX_OUT_CLEAN_MEM(char *, DNNL_ARG_TO, status); -- CHECK(status); -+ auto out = CTX_OUT_MEM(char *, DNNL_ARG_TO); - DEFINE_SCALES_BUFFER(scales); -+ DEFINE_ZERO_POINT_VALUE(src_zp, DNNL_ARG_FROM); -+ DEFINE_ZERO_POINT_VALUE(dst_zp, DNNL_ARG_TO); -+ const auto &scratchpad = ctx.get_scratchpad_grantor(); -+ -+ omp_driver(in, out, scales, src_zp, dst_zp, scratchpad); -+ -+ return status::success; -+} -+ -+status_t jit_blk_reorder_t::pd_t::create(reorder_pd_t **reorder_pd, -+ engine_t *engine, const primitive_attr_t *attr, engine_t *src_engine, -+ const memory_desc_t *src_md, engine_t *dst_engine, -+ const memory_desc_t *dst_md) { -+ auto prb = tr::prb_t(); -+ -+ status_t prb_init_status = prb_init(prb, *src_md, *dst_md, attr); -+ if (prb_init_status != status::success) return prb_init_status; -+ // only uni_reorder supports tail processing now -+ // TODO: Add tail processing support in blk_reorder -+ if (prb.is_tail_present) return status::unimplemented; -+ -+ prb_tile_normalize(prb); -+ DEBUG({ -+ printf("tile : "); -+ prb_dump(prb); -+ }); -+ -+ if (!tr::jit_single_blk_kernel_t::applicable(prb)) { -+ return status::unimplemented; -+ } - -- omp_driver(in, out, scales); -+ auto _pd = new pd_t( -+ attr, src_engine->kind(), src_md, dst_engine->kind(), dst_md); -+ if (_pd == nullptr) return status::out_of_memory; -+ _pd->prb_ = prb; -+ if (_pd->init(engine, src_engine, dst_engine) != status::success) { -+ delete _pd; -+ return status::unimplemented; -+ } -+ _pd->init_scratchpad_md(); -+ -+ return safe_ptr_assign(*reorder_pd, _pd); -+} -+ -+void jit_blk_reorder_t::pd_t::prb_tile_normalize(tr::prb_t &p) { -+ if (!utils::one_of(p.nodes[0].n, 8ul, 16ul) -+ && utils::one_of(p.nodes[1].n, 8ul, 16ul)) { -+ nstl::swap(p.nodes[0], p.nodes[1]); -+ } -+} -+ -+jit_blk_reorder_t::jit_blk_reorder_t(const pd_t *apd) : primitive_t(apd) {} -+jit_blk_reorder_t::~jit_blk_reorder_t() = default; -+ -+status_t jit_blk_reorder_t::init(engine_t *engine) { -+ kernel_ = utils::make_unique(pd()->prb_); -+ return kernel_->create_kernel(); -+} -+ -+status_t jit_blk_reorder_t::execute(const exec_ctx_t &ctx) const { -+ const auto in = CTX_IN_MEM(const char *, DNNL_ARG_FROM); -+ auto out = CTX_OUT_MEM(char *, DNNL_ARG_TO); -+ DEFINE_ZERO_POINT_VALUE(src_zp, DNNL_ARG_FROM); -+ DEFINE_ZERO_POINT_VALUE(dst_zp, DNNL_ARG_TO); -+ -+ // kernel handle 2-dimension tiles, a tail is possible -+ auto &prb = this->pd()->prb_; -+ ptrdiff_t BH = 1; -+ for (int i = 2; i < prb.ndims; ++i) { -+ BH *= prb.nodes[i].n; -+ } -+ -+ auto block_sz = prb.n(0); -+ auto n1 = prb.n(1); -+ auto i1 = prb.is(1); -+ auto o1 = prb.os(1); -+ auto FL = (n1 + block_sz - 1) / block_sz; -+ auto bh_stride = BH == 1 ? 0 : prb.is(2); -+ -+ auto itype_sz_ = data_type_size(pd()->prb_.itype); -+ auto otype_sz_ = data_type_size(pd()->prb_.otype); -+ -+ parallel_nd(BH, FL, [&](dim_t bh, dim_t fl) { -+ auto fl_b = fl * block_sz; -+ auto bh_b = bh_stride * bh; -+ auto *i = in + (bh_b + fl_b * i1) * itype_sz_; -+ auto *o = out + (bh_b + fl_b * o1) * otype_sz_; -+ (*kernel_)(i, o, n1 - fl_b < block_sz, src_zp, dst_zp); -+ }); - - return status::success; - } -diff --git a/src/cpu/aarch64/jit_uni_reorder.hpp b/src/cpu/aarch64/jit_uni_reorder.hpp -index 2fb6f0f89f3..bf400430ba5 100644 ---- a/src/cpu/aarch64/jit_uni_reorder.hpp -+++ b/src/cpu/aarch64/jit_uni_reorder.hpp -@@ -1,6 +1,6 @@ - /******************************************************************************* --* Copyright 2018-2020 Intel Corporation --* Copyright 2020 FUJITSU LIMITED -+* Copyright 2018-2022 Intel Corporation -+* Copyright 2020-2022 FUJITSU LIMITED - * Copyright 2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); -@@ -36,15 +36,76 @@ namespace tr { - constexpr int max_ndims = DNNL_MAX_NDIMS; - - struct node_t { -- size_t n; -- ptrdiff_t is; // input stride -- ptrdiff_t os; // output stride -- ptrdiff_t ss; // scale stride -+ static constexpr int64_t empty_field = -1; -+ -+ size_t n = 0; -+ size_t tail_size = 0; -+ int dim_id = empty_field; -+ int parent_node_id = empty_field; -+ bool is_zero_pad_needed = false; -+ ptrdiff_t is = 0; // input stride -+ ptrdiff_t os = 0; // output stride -+ ptrdiff_t ss = 0; // scale stride -+ ptrdiff_t cs = 0; // compensation stride -+ -+ bool is_dim_id_empty() const { return dim_id == empty_field; } -+ bool is_parent_empty() const { return parent_node_id == empty_field; } - }; - - enum class scale_type_t { NONE, COMMON, MANY }; - - struct prb_t { -+ /* The compensation mask value indicates how big an additional buffer should be. -+ * Possible values for reorder: -+ * 1) standard compensation = 1 = 0b01 -+ * 2) asymmetric compensation = 2 = 0b10 -+ * 3) compensation if tensor contains group = 3 = 0b11 */ -+ static constexpr int invalid_comp_mask = 0; -+ static constexpr int standard_comp_mask = 0b1; -+ static constexpr int asymmetric_comp_mask = 0b10; -+ static constexpr int comp_mask_with_groups -+ = standard_comp_mask + asymmetric_comp_mask; -+ -+ bool is_tail_in_one_of_child_nodes(int parent_node_id) const { -+ for (int i = parent_node_id; i >= 0; i--) { -+ if (nodes[i].parent_node_id == parent_node_id) { -+ if (nodes[i].tail_size != 0) -+ return true; -+ else -+ parent_node_id = i; -+ } -+ } -+ -+ return false; -+ } -+ -+ int tail(int d) const { -+ assert(d < ndims); -+ return static_cast(nodes[d].tail_size); -+ } -+ -+ int n(int d) const { -+ assert(d < ndims); -+ return static_cast(nodes[d].n); -+ } -+ int is(int d) const { -+ assert(d < ndims); -+ return static_cast(nodes[d].is); -+ } -+ int os(int d) const { -+ assert(d < ndims); -+ return static_cast(nodes[d].os); -+ } -+ int ss(int d) const { -+ assert(d < ndims); -+ return static_cast(nodes[d].ss); -+ } -+ -+ int cs(int d) const { -+ assert(d < ndims); -+ return static_cast(nodes[d].cs); -+ } -+ - data_type_t itype; - data_type_t otype; - int ndims; -@@ -54,21 +115,24 @@ struct prb_t { - scale_type_t scale_type; - float beta; - int full_ndims; -- int ip_tail; -- int op_tail; -- int iblock; -- int oblock; -- int blk_chunk_idx; -+ bool is_tail_present = false; -+ float scale_adjust = 1.f; -+ int compensation_mask = invalid_comp_mask; -+ bool req_s8s8_comp = false; -+ bool req_asymmetric_comp = false; -+ bool req_src_zp = false; -+ bool req_dst_zp = false; - }; - - status_t prb_init(prb_t &prb, const memory_desc_t &imd, - const memory_desc_t &omd, const primitive_attr_t *attr); - --status_t prb_check_blk(prb_t &prb, const memory_desc_t &imd); -- - /** sorts the problem nodes so that output strides come in ascending order */ - void prb_normalize(prb_t &p); - -+/** fill parent node info for blocked nodes */ -+void prb_node_dependency(prb_t &p); -+ - /** folds nodes together if possible */ - void prb_simplify(prb_t &p); - -@@ -88,10 +152,24 @@ void prb_node_move(prb_t &p, int d0, int d1); - void prb_dump(const prb_t &p); - - struct call_param_t { -- const void *in; -- void *out; -- const float *scale; -- size_t blk_chunks; -+ const void *in = nullptr; -+ void *out = nullptr; -+ const float *scale = nullptr; -+ int32_t src_zp = 0; -+ int32_t dst_zp = 0; -+ int32_t *compensation_scratch = nullptr; -+}; -+ -+// The additional structure is needed because -+// using a data structure with tail processing -+// data for non-tail cases reduces kernel -+// performance. This is because there is too -+// much data that has to be transferred to the kernel. -+struct tail_call_param_t { -+ call_param_t base_params; -+ int64_t curr_data_chunks[DNNL_MAX_NDIMS] = {-1}; -+ int64_t zeroing_data = static_cast(false); -+ int64_t skip_kernel_execution = static_cast(false); - }; - - struct kernel_t { -@@ -100,8 +178,12 @@ struct kernel_t { - prb_t prb; - }; - -- kernel_t(const desc_t &desc) : desc_(desc) {} -+ kernel_t(const desc_t &desc) -+ : desc_(desc) -+ , compensation_needed_( -+ desc.prb.req_s8s8_comp || desc.prb.req_asymmetric_comp) {} - virtual void operator()(const call_param_t *c) const = 0; -+ virtual void operator()(const tail_call_param_t *c) const = 0; - virtual status_t create_kernel() = 0; - virtual ~kernel_t() {} - -@@ -119,10 +201,13 @@ struct kernel_t { - protected: - const desc_t desc_; - const prb_t &prb_ = desc_.prb; -+ bool compensation_needed_ = false; - }; - - /* TODO: add trans_t class */ - -+struct jit_single_blk_kernel_t; -+ - } // namespace tr - - struct jit_uni_reorder_t : public primitive_t { -@@ -135,8 +220,13 @@ struct jit_uni_reorder_t : public primitive_t { - tr::prb_t prb_; - tr::kernel_t::desc_t ker_desc_; - int nthr_; -+ bool with_groups_ = false; -+ -+ status_t init( -+ engine_t *engine, engine_t *src_engine, engine_t *dst_engine); - - private: -+ void init_scratchpad(); - static status_t create(reorder_pd_t **reorder_pd, engine_t *engine, - const primitive_attr_t *attr, engine_t *src_engine, - const memory_desc_t *src_md, engine_t *dst_engine, -@@ -151,23 +241,66 @@ struct jit_uni_reorder_t : public primitive_t { - enum { ndims_driver_max = 4 }; - - private: -- void omp_driver_0d( -- int off, const char *in, char *out, const float *scale) const; -+ void omp_driver_0d(int off, const char *in, char *out, const float *scale, -+ int src_zp, int dst_zp, int32_t *compensation_scratch) const; - void omp_driver_1d(int ithr, int nthr, int off, const char *in, char *out, -- const float *scale) const; -+ const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const; - void omp_driver_2d(int ithr, int nthr, int off, const char *in, char *out, -- const float *scale) const; -+ const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const; - void omp_driver_3d(int ithr, int nthr, int off, const char *in, char *out, -- const float *scale) const; -+ const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const; - void omp_driver_4d(int ithr, int nthr, int off, const char *in, char *out, -- const float *scale) const; -+ const float *scale, int src_zp, int dst_zp, -+ int32_t *compensation_scratch) const; -+ -+ void omp_driver(const char *in, char *out, const float *scale, int src_zp, -+ int dst_zp, const memory_tracking::grantor_t &scratchpad) const; - -- void omp_driver(const char *in, char *out, const float *scale) const; -+ void fill_curr_data_chunks(const tr::prb_t &prb, const int off, -+ const ptrdiff_t *omp_data_chunks, const int omp_ndims, -+ tr::tail_call_param_t &c) const; -+ -+ void reduce_compensation(char *out, -+ const int32_t *compensation_reduce_scratch, const int nthr, -+ const dim_t wspace_per_thr_size) const; - - const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } - std::unique_ptr kernel_; - }; - -+struct jit_blk_reorder_t : public primitive_t { -+ using primitive_t::primitive_t; -+ struct pd_t : public cpu_reorder_pd_t { -+ using cpu_reorder_pd_t::cpu_reorder_pd_t; -+ DECLARE_COMMON_PD_T("jit:blk", jit_blk_reorder_t); -+ -+ tr::prb_t prb_; -+ -+ private: -+ static status_t create(reorder_pd_t **reorder_pd, engine_t *engine, -+ const primitive_attr_t *attr, engine_t *src_engine, -+ const memory_desc_t *src_md, engine_t *dst_engine, -+ const memory_desc_t *dst_md); -+ -+ // Swap last two nodes, put block 4, 8, 16 nodes to first -+ static void prb_tile_normalize(tr::prb_t &p); -+ friend dnnl::impl::impl_list_item_t; -+ }; -+ -+ status_t init(engine_t *engine) override; -+ status_t execute(const exec_ctx_t &ctx) const override; -+ -+ jit_blk_reorder_t(const pd_t *apd); -+ ~jit_blk_reorder_t(); -+ -+private: -+ const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } -+ std::unique_ptr kernel_; -+}; -+ - } // namespace aarch64 - } // namespace cpu - } // namespace impl -diff --git a/src/cpu/aarch64/jit_uni_reorder_utils.cpp b/src/cpu/aarch64/jit_uni_reorder_utils.cpp -index 7123811f827..28f36a7e2e7 100644 ---- a/src/cpu/aarch64/jit_uni_reorder_utils.cpp -+++ b/src/cpu/aarch64/jit_uni_reorder_utils.cpp -@@ -1,6 +1,6 @@ - /******************************************************************************* --* Copyright 2018-2021 Intel Corporation --* Copyright 2020 FUJITSU LIMITED -+* Copyright 2018-2022 Intel Corporation -+* Copyright 2020-2022 FUJITSU LIMITED - * Copyright 2022 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); -@@ -25,10 +25,21 @@ - #include "common/nstl.hpp" - #include "common/type_helpers.hpp" - #include "common/utils.hpp" --#include "dnnl_debug.h" -+#include "oneapi/dnnl/dnnl_debug.h" - - #include "cpu/aarch64/jit_uni_reorder.hpp" - -+// #define TR_DEBUG -+#if defined(TR_DEBUG) -+#define DEBUg(...) \ -+ do { \ -+ __VA_ARGS__ \ -+ } while (0) -+#else -+#define DEBUg(...) -+#endif -+#define DEBUG(...) DEBUg(__VA_ARGS__) -+ - using namespace dnnl::impl::types; - using namespace dnnl::impl::status; - -@@ -41,87 +52,45 @@ namespace tr { - - /** ad-hoc structure to describe blocked memory layout */ - struct layout_desc_t { -+ layout_desc_t() -+ : dt(dnnl_data_type_undef) -+ , ndims(0) -+ , id {-1} -+ , dims {0} -+ , tails {0} -+ , is_blk {false} -+ , strides {0} {} - data_type_t dt; - int ndims; - dims_t id; - dims_t dims; -+ dims_t tails; -+ bool is_blk[DNNL_MAX_NDIMS]; - strides_t strides; - }; - --static status_t compute_blk_and_tail( -- const memory_desc_t &md_, const int idx, int &blk, int &tail) { -- const auto md = memory_desc_wrapper(md_); -- const auto &bd = md.blocking_desc(); -- if (tail == 0) return status::success; -- -- const std::set unique_inner_idxs( -- bd.inner_idxs, bd.inner_idxs + bd.inner_nblks); -- std::set dims_with_multiple_blks; -- for (dim_t dim : unique_inner_idxs) { -- if (std::count(bd.inner_idxs, bd.inner_idxs + bd.inner_nblks, dim) > 1) -- dims_with_multiple_blks.insert(dim); -- } -- -- // Dims that have a tail and have multiple blocks are not supported by the jit kernel yet. -- // For example: -- // src_tag = abcd -- // dst_tag = ABcd16b16a4b -- // 16x15x3x3 -- // In this case, 'b' dim has two blocks and has a tail. It is not a supported case. -- if (dims_with_multiple_blks.find(idx) != dims_with_multiple_blks.end()) -- return status::unimplemented; -- -- // Only supports inconsistent padding in single and double blocks -- // and the total block size <= 256 -- for (int iblk = bd.inner_nblks - 1; iblk > 0; --iblk) { -- if (bd.inner_idxs[iblk] == idx) break; -- blk *= bd.inner_blks[iblk]; -- tail *= bd.inner_blks[iblk]; -- } -- if (unique_inner_idxs.size() > 2 || blk > 256) return status::unimplemented; -- -- return status::success; --} -- --static status_t compute_chunk_idx(const prb_t &p, const memory_desc_t &imd_, -- const memory_desc_t &omd_, const int blk_idx, int &chunk_idx) { -- const auto imd = memory_desc_wrapper(imd_); -- const auto omd = memory_desc_wrapper(omd_); -- const auto &ibd = imd.blocking_desc(); -- const auto &obd = omd.blocking_desc(); -- if (p.ip_tail == 0 && p.op_tail == 0) return status::success; -- -- const ptrdiff_t is -- = ibd.strides[blk_idx] * obd.inner_blks[obd.inner_idxs[blk_idx]]; -- const ptrdiff_t os = obd.strides[blk_idx]; -- -- for (int i = blk_idx; i < omd.ndims(); ++i) { -- if (p.nodes[i].os == os && p.nodes[i].is == is) { -- chunk_idx = i; -- return status::success; -- } -- } -- -- return status::invalid_arguments; --} -- - status_t cvt_mem_desc_to_layout_desc(const memory_desc_t &md_, -- layout_desc_t &ld, const dims_t &blocks, const dims_t &ext_padding) { -+ layout_desc_t &ld, const dims_t &blocks, const dims_t &external_padding, -+ const dims_t &tails) { -+ static constexpr bool it_is_blk = true; -+ - const auto md = memory_desc_wrapper(md_); - -- bool ok = true && md.is_blocking_desc() && md.extra().flags == 0; -- if (!ok) return invalid_arguments; -+ if (!md.is_blocking_desc()) return invalid_arguments; - - const auto &bd = md.blocking_desc(); - - ld.ndims = 0; - ld.dt = md.data_type(); - -- auto P = [&ld](int id, int dim, ptrdiff_t stride) { -+ auto add_dim = [&ld](int id, dim_t dim, dim_t tail, bool is_blk, -+ ptrdiff_t stride) { - assert((size_t)ld.ndims < sizeof(ld.dims) / sizeof(ld.dims[0])); - ld.id[ld.ndims] = id; - ld.dims[ld.ndims] = dim; - ld.strides[ld.ndims] = stride; -+ ld.tails[ld.ndims] = tail; -+ ld.is_blk[ld.ndims] = is_blk; - ++ld.ndims; - }; - -@@ -129,12 +98,27 @@ status_t cvt_mem_desc_to_layout_desc(const memory_desc_t &md_, - const int ld_ndims_start = ld.ndims; - if (blocks[d] != 1) { - stride_t stride = 1; -+ int tail = tails[d]; - for (int iblk = bd.inner_nblks - 1; iblk >= 0; --iblk) { -- if (bd.inner_idxs[iblk] == d) P(d, bd.inner_blks[iblk], stride); -+ if (bd.inner_idxs[iblk] == d) { -+ const dim_t inner_tail = tail % bd.inner_blks[iblk]; -+ add_dim(d, bd.inner_blks[iblk], inner_tail, it_is_blk, -+ stride); -+ tail = utils::div_up(tail, bd.inner_blks[iblk]); -+ } - stride *= bd.inner_blks[iblk]; - } - } -- P(d, (md.padded_dims()[d] + ext_padding[d]) / blocks[d], bd.strides[d]); -+ -+ const dim_t dim_with_external_padding -+ = (md.padded_dims()[d] + external_padding[d]) / blocks[d]; -+ const dim_t padded_dim = md.padded_dims()[d] / blocks[d]; -+ const dim_t tail = dim_with_external_padding != padded_dim -+ ? dim_with_external_padding -+ - (dim_with_external_padding - padded_dim) -+ : 0; -+ -+ add_dim(d, dim_with_external_padding, tail, !it_is_blk, bd.strides[d]); - - // TODO: NOW: revisit, do we need a reverse? - // TODO: NOW: consider using strides instead of block sizes in md -@@ -144,12 +128,70 @@ status_t cvt_mem_desc_to_layout_desc(const memory_desc_t &md_, - const int idx1 = ld.ndims - 1 - ld_d; - nstl::swap(ld.dims[idx0], ld.dims[idx1]); - nstl::swap(ld.strides[idx0], ld.strides[idx1]); -+ nstl::swap(ld.tails[idx0], ld.tails[idx1]); -+ nstl::swap(ld.is_blk[idx0], ld.is_blk[idx1]); - } - } - - return success; - } - -+static bool is_with_groups(const memory_desc_t &dst_md) { -+ using namespace memory_extra_flags; -+ auto dst_d = memory_desc_wrapper(dst_md); -+ const int grp_bit = 1 << 1; -+ auto check_flag_and_mask = [&](int flag, int mask) { -+ return (dst_d.extra().flags & flag) && (mask & grp_bit); -+ }; -+ -+ return check_flag_and_mask( -+ compensation_conv_s8s8, dst_d.extra().compensation_mask) -+ || check_flag_and_mask(compensation_conv_asymmetric_src, -+ dst_d.extra().asymm_compensation_mask); -+} -+ -+static inline int get_next_parent_node(node_t *nodes, int ndims, int cur_node) { -+ const int cur_id = nodes[cur_node].dim_id; -+ for (int d = cur_node + 1; d < ndims; ++d) { -+ if (nodes[d].dim_id == cur_id) return d; -+ } -+ return -1; -+} -+ -+static void prb_set_compensation_strides(prb_t &p) { -+ -+ auto require_n_stride = [&](int cur_node) -> bool { -+ const int parent = get_next_parent_node(p.nodes, p.ndims, cur_node); -+ if (parent < 0) return false; -+ -+ const size_t p_n = p.nodes[parent].n; -+ -+ // if 'parent_node.n' is larger than 1, then cur_node stride -+ // is 'cur_node.n' -+ return p_n > size_t(1); -+ }; -+ -+ const auto compensation_needed = p.req_s8s8_comp || p.req_asymmetric_comp; -+ if (!compensation_needed) return; -+ int mask = p.compensation_mask; -+ ptrdiff_t cs = 1; -+ for (int d = 0; d < p.ndims; ++d) { -+ if (mask & (1 << p.nodes[d].dim_id)) { -+ -+ // correct cases when 'cs' exceeds output stride -+ if (cs > p.nodes[d].os) cs = p.nodes[d].os; -+ -+ p.nodes[d].cs = cs; -+ const bool n_stride = require_n_stride(d); -+ if (p.nodes[d].tail_size > 0 && (!p.nodes[d].is_zero_pad_needed) -+ && (!n_stride)) -+ cs *= p.nodes[d].tail_size; -+ else -+ cs *= p.nodes[d].n; -+ } -+ } -+} -+ - status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - const primitive_attr_t *attr) { - auto im_d = memory_desc_wrapper(imd); -@@ -157,8 +199,7 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - - auto check_post_ops = [](const primitive_attr_t *attr) { - const auto &po = attr->post_ops_; -- return po.len() == 0 -- || (po.len() == 1 && po.contain(primitive_kind::sum, 0)); -+ return po.len() == 0 || (po.len() == 1 && po.entry_[0].is_sum(false)); - }; - - bool ok = im_d.is_blocking_desc() && om_d.is_blocking_desc() -@@ -166,81 +207,129 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - && !om_d.has_runtime_dims_or_strides() && !om_d.has_zero_dim() - && attr->has_default_values( - primitive_attr_t::skip_mask_t::oscale_runtime -+ | primitive_attr_t::skip_mask_t::zero_points_runtime - | primitive_attr_t::skip_mask_t::post_ops) - && check_post_ops(attr); - if (!ok) return unimplemented; - -- dims_t iblocks, oblocks, ip_padding, op_padding; -+ bool is_tail_present = false; -+ dims_t iblocks, oblocks, i_tails, o_tails, i_paddings, o_paddings; - im_d.compute_blocks(iblocks); - om_d.compute_blocks(oblocks); -- utils::array_set(ip_padding, 0, im_d.ndims()); -- utils::array_set(op_padding, 0, om_d.ndims()); -- -- /* padding_dim consistency check -- * only supports inconsitent padding for src -- * TODO: Add inconsistent padding support for dst */ -- int ip_tail = 0; -- int op_tail = 0; -- int iblk_w_tail = 1; -- int oblk_w_tail = 1; -- int blk_idx = 0; -+ -+ for (int d = 0; d < om_d.ndims(); ++d) { -+ const auto dim = om_d.dims()[d]; -+ const auto pdim = om_d.padded_dims()[d]; -+ const auto cblock = oblocks[d]; -+ // do not allow excess pdim other than required for rounding-up of dim. -+ if (utils::rnd_up(dim, cblock) != pdim) return unimplemented; -+ } -+ -+ utils::array_set(i_tails, 0, im_d.ndims()); -+ utils::array_set(o_tails, 0, om_d.ndims()); -+ utils::array_set(i_paddings, 0, im_d.ndims()); -+ utils::array_set(o_paddings, 0, om_d.ndims()); - - for (int d = 0; d < im_d.ndims(); ++d) { -- const int ip_tmp_dim = im_d.padded_dims()[d]; -- const int op_tmp_dim = om_d.padded_dims()[d]; -- const int ip_tmp_tail = ip_tmp_dim % oblocks[d]; -- const int op_tmp_tail = op_tmp_dim % iblocks[d]; -- -- const bool pdim_consistent = ip_tmp_dim == op_tmp_dim -- && ip_tmp_tail == 0 && op_tmp_tail == 0; -- const bool pdim_tail = ip_tmp_tail > 0 -- && (ip_tmp_dim + oblocks[d] - ip_tmp_tail) == op_tmp_dim -- && op_tmp_tail == 0 && ip_tail == 0; -- if (!pdim_consistent && !pdim_tail) return status::unimplemented; -- if (pdim_tail) { -- blk_idx = d; -- ip_tail = ip_tmp_tail; -- op_tail = op_tmp_tail; -- iblk_w_tail = iblocks[d]; -- oblk_w_tail = oblocks[d]; -- ip_padding[d] = oblocks[d] - ip_tmp_tail; -- op_padding[d] = iblocks[d] - op_tmp_tail; -+ const dim_t i_dim = im_d.dims()[d]; -+ const dim_t o_dim = om_d.dims()[d]; -+ const dim_t i_tail = i_dim % iblocks[d]; -+ const dim_t o_tail = o_dim % oblocks[d]; -+ -+ if (o_tail > 0) { -+ is_tail_present = true; -+ o_tails[d] = o_tail; -+ o_paddings[d] = oblocks[d] - o_tail; -+ } -+ -+ if (i_tail > 0) { -+ is_tail_present = true; -+ i_tails[d] = i_tail; -+ i_paddings[d] = iblocks[d] - i_tail; - } - } -- CHECK(compute_blk_and_tail(omd, blk_idx, oblk_w_tail, ip_tail)); - -+ // To compute input layout description we need to pass output paddings -+ // which will be used to compute input dims rounded up to multiple of -+ // output dims. Analogous applies to output layout description. -+ // This is demanded by the algorithm of nodes creation. -+ // Example: -+ // input: -+ // format: abc -+ // size: 77, 15, 3 -+ // o_padding: 3, 17, 0 -+ // returns ild: 80, 32, 3 -+ // output: -+ // format: ABc16b16a2b -+ // size: 77, 15, 3 -+ // i_padding: 0, 0, 0 -+ // returns old: 5, 16, 1, 16, 2, 3 - layout_desc_t ild, old; -- status_t status -- = cvt_mem_desc_to_layout_desc(imd, ild, iblocks, ip_padding); -- if (status != success) return status; -- status = cvt_mem_desc_to_layout_desc(omd, old, oblocks, op_padding); -- if (status != success) return status; -+ CHECK(cvt_mem_desc_to_layout_desc(imd, ild, iblocks, o_paddings, i_tails)); -+ CHECK(cvt_mem_desc_to_layout_desc(omd, old, oblocks, i_paddings, o_tails)); - - p.itype = ild.dt; - p.otype = old.dt; -- p.ip_tail = ip_tail; -- p.op_tail = op_tail; -- p.iblock = iblk_w_tail; -- p.oblock = oblk_w_tail; -- -+ p.is_tail_present = is_tail_present; -+ p.req_src_zp = !attr->zero_points_.has_default_values(DNNL_ARG_SRC); -+ p.req_dst_zp = !attr->zero_points_.has_default_values(DNNL_ARG_DST); - p.scale_type = attr->output_scales_.has_default_values() - ? scale_type_t::NONE - : (attr->output_scales_.mask_ == 0 ? scale_type_t::COMMON - : scale_type_t::MANY); -+ p.scale_adjust = (om_d.extra().flags & memory_extra_flags::scale_adjust) -+ ? om_d.extra().scale_adjust -+ : 1.f; -+ p.req_s8s8_comp -+ = om_d.extra().flags & memory_extra_flags::compensation_conv_s8s8; -+ p.req_asymmetric_comp = om_d.extra().flags -+ & memory_extra_flags::compensation_conv_asymmetric_src; -+ -+ const bool with_groups = is_with_groups(omd); -+ -+ auto mask_ok = [&](bool check, int mask) { -+ return IMPLICATION(check, mask == (with_groups ? 0x3 : 0x1)); -+ }; -+ -+ if (!mask_ok(p.req_s8s8_comp, om_d.extra().compensation_mask) -+ || !mask_ok(p.req_asymmetric_comp, -+ om_d.extra().asymm_compensation_mask)) -+ return status::unimplemented; - -- ptrdiff_t ss[max_ndims] = {0}; -+ ptrdiff_t ss[max_ndims] = {0}; // scales strides - if (p.scale_type == scale_type_t::MANY) { -- ptrdiff_t last_ss = 1; -+ const int mask = attr->output_scales_.mask_; -+ ptrdiff_t dense_stride = 1; -+ ptrdiff_t last_stride = 1; - for (int d = old.ndims - 1; d >= 0; --d) { - assert((d == 0 || old.id[d - 1] <= old.id[d]) - && "logical dimensions should be in ascending order"); -- if (attr->output_scales_.mask_ & (1 << old.id[d])) { -- ss[d] = last_ss; -- last_ss *= old.dims[d]; -+ if (mask & (1 << old.id[d])) { -+ if ((d + 1) < old.ndims && old.id[d + 1] != old.id[d] -+ && (mask & (1 << old.id[d + 1]))) { -+ dense_stride = dense_stride * imd.dims[old.id[d + 1]]; -+ last_stride = dense_stride; -+ } -+ ss[d] = last_stride; -+ last_stride *= old.dims[d]; - } - } - } - -+ const auto compensation_needed = p.req_s8s8_comp || p.req_asymmetric_comp; -+ if (compensation_needed) { -+ p.compensation_mask = p.req_s8s8_comp -+ ? om_d.extra().compensation_mask -+ : (p.req_asymmetric_comp ? om_d.extra().asymm_compensation_mask -+ : tr::prb_t::invalid_comp_mask); -+ -+ if (p.compensation_mask == tr::prb_t::asymmetric_comp_mask) -+ return unimplemented; -+ -+ assert(p.compensation_mask == tr::prb_t::standard_comp_mask -+ || p.compensation_mask == tr::prb_t::comp_mask_with_groups); -+ } -+ - int ndims = 0; - - int i_pos = 0; /* state for input -- current dimension */ -@@ -254,6 +343,10 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - - if (ild.dims[i_pos] == old.dims[o_pos]) { - p.nodes[ndims].n = ild.dims[i_pos]; -+ p.nodes[ndims].dim_id = old.id[o_pos]; -+ p.nodes[ndims].tail_size = old.tails[o_pos]; -+ p.nodes[ndims].is_zero_pad_needed -+ = old.is_blk[o_pos] && old.tails[o_pos] > 0; - p.nodes[ndims].is = ild.strides[i_pos]; - p.nodes[ndims].os = old.strides[o_pos]; - p.nodes[ndims].ss = ss[o_pos]; -@@ -261,19 +354,45 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - ++i_pos; - ++o_pos; - } else if (ild.dims[i_pos] < old.dims[o_pos]) { -- assert(old.dims[o_pos] % ild.dims[i_pos] == 0); -- int factor = old.dims[o_pos] / ild.dims[i_pos]; -+ // old must be divisible by ild or we will not be -+ // able to create valid nodes. The problem appears -+ // when stag=Acdb48a and dtag=Acdb32a for example. -+ if (ild.dims[i_pos] == 0 || old.dims[o_pos] % ild.dims[i_pos] != 0) -+ return status::unimplemented; -+ -+ dim_t factor = old.dims[o_pos] / ild.dims[i_pos]; -+ -+ const size_t tail_of_upper_dim -+ = utils::div_up(old.tails[o_pos], factor) == ild.dims[i_pos] -+ ? 0 -+ : utils::div_up(old.tails[o_pos], factor); -+ const size_t tail_of_lower_dim = old.tails[o_pos] % factor; -+ - p.nodes[ndims].n = ild.dims[i_pos]; -+ p.nodes[ndims].dim_id = old.id[o_pos]; -+ p.nodes[ndims].tail_size = tail_of_upper_dim; -+ p.nodes[ndims].is_zero_pad_needed -+ = old.is_blk[o_pos] && tail_of_upper_dim > 0; - p.nodes[ndims].is = ild.strides[i_pos]; - p.nodes[ndims].os = old.strides[o_pos] * factor; - p.nodes[ndims].ss = ss[o_pos] * factor; - ++ndims; - ++i_pos; - old.dims[o_pos] = factor; -+ old.tails[o_pos] = tail_of_lower_dim; - } else if (ild.dims[i_pos] > old.dims[o_pos]) { -- assert(ild.dims[i_pos] % old.dims[o_pos] == 0); -- int factor = ild.dims[i_pos] / old.dims[o_pos]; -+ // ild must be divisible by old or we will not be -+ // able to create valid nodes. The problem appears -+ // when stag=Acdb32a and dtag=Acdb48a for example. -+ if (old.dims[o_pos] == 0 || ild.dims[i_pos] % old.dims[o_pos] != 0) -+ return status::unimplemented; -+ -+ dim_t factor = ild.dims[i_pos] / old.dims[o_pos]; - p.nodes[ndims].n = old.dims[o_pos]; -+ p.nodes[ndims].dim_id = old.id[o_pos]; -+ p.nodes[ndims].tail_size = old.tails[o_pos]; -+ p.nodes[ndims].is_zero_pad_needed -+ = old.is_blk[o_pos] && old.tails[o_pos] > 0; - p.nodes[ndims].is = ild.strides[i_pos] * factor; - p.nodes[ndims].os = old.strides[o_pos]; - p.nodes[ndims].ss = ss[o_pos]; -@@ -282,12 +401,9 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - ild.dims[i_pos] = factor; - } - } -- int blk_chunk_idx = ndims; -- CHECK(compute_chunk_idx(p, imd, omd, blk_idx, blk_chunk_idx)); - - p.ndims = ndims; - p.full_ndims = ndims; -- p.blk_chunk_idx = blk_chunk_idx; - - p.ioff = memory_desc_wrapper(imd).offset0(); - p.ooff = memory_desc_wrapper(omd).offset0(); -@@ -295,6 +411,28 @@ status_t prb_init(prb_t &p, const memory_desc_t &imd, const memory_desc_t &omd, - const int sum_idx = attr->post_ops_.find(primitive_kind::sum); - p.beta = sum_idx == -1 ? 0.f : attr->post_ops_.entry_[sum_idx].sum.scale; - -+ DEBUG({ -+ printf("init : "); -+ prb_dump(prb); -+ }); -+ // Sort the prb array in increasing sizes of the output stride -+ prb_normalize(p); -+ DEBUG({ -+ printf("norm : "); -+ prb_dump(prb); -+ }); -+ -+ // compensation strides require prb_normalized -+ prb_set_compensation_strides(p); -+ -+ /* Combine the variables, which appear together on both -+ * sides of the reorder */ -+ prb_simplify(p); -+ DEBUG({ -+ printf("smpl : "); -+ prb_dump(prb); -+ }); -+ - return success; - } - -@@ -307,28 +445,23 @@ void prb_normalize(prb_t &p) { - && p.nodes[j].n < p.nodes[min_pos].n); - if (new_min) min_pos = j; - } -- if (min_pos != d) { -- nstl::swap(p.nodes[d], p.nodes[min_pos]); -- if (p.blk_chunk_idx == min_pos || p.blk_chunk_idx == d) -- p.blk_chunk_idx = p.blk_chunk_idx == min_pos ? d : min_pos; -- } -+ if (min_pos != d) { nstl::swap(p.nodes[d], p.nodes[min_pos]); } - } - } - --status_t prb_check_blk(prb_t &p, const memory_desc_t &md_) { -- const auto md = memory_desc_wrapper(md_); -- const auto &bd = md.blocking_desc(); -- if (p.ip_tail == 0) return status::success; -- -- // Check if the inner blocks and p.nodes[blk].n in the firsti nblks -- // is equivalent in reverse order when has tail in block layout. -- const int nblk = bd.inner_nblks; -- for (int iblk = 0; iblk < nblk; ++iblk) { -- if (bd.inner_blks[nblk - iblk - 1] -- != static_cast(p.nodes[iblk].n)) -- return status::unimplemented; -+void prb_node_dependency(prb_t &prb) { -+ for (int i = 0; i < prb.ndims; i++) { -+ tr::node_t &node = prb.nodes[i]; -+ node.parent_node_id = node_t::empty_field; -+ for (int j = i + 1; j < prb.ndims; j++) { -+ const tr::node_t &potential_parent_node = prb.nodes[j]; -+ if (!potential_parent_node.is_dim_id_empty() -+ && potential_parent_node.dim_id == node.dim_id) { -+ node.parent_node_id = j; -+ break; -+ } -+ } - } -- return status::success; - } - - void prb_simplify(prb_t &p) { -@@ -338,16 +471,25 @@ void prb_simplify(prb_t &p) { - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Warray-bounds" - #endif -+ -+ const auto skip_dim_combining = [&p](const int node_id) -> bool { -+ return (p.is_tail_in_one_of_child_nodes(node_id) -+ && p.nodes[node_id].n > 1) -+ || p.nodes[node_id].tail_size > 0; -+ }; -+ -+ if (p.is_tail_present) prb_node_dependency(p); -+ - for (int d = 0; d < p.ndims - 1; ++d) { - auto &this_node = p.nodes[d + 0]; - auto &next_node = p.nodes[d + 1]; -- const bool skip_blk_idx = (p.ip_tail > 0 || p.op_tail > 0) -- && (p.blk_chunk_idx == d || p.blk_chunk_idx == d + 1); -+ const bool skip_dims_combining -+ = skip_dim_combining(d) || skip_dim_combining(d + 1); - const bool fold = false - || (next_node.n == static_cast(1) -- && !skip_blk_idx) // trivial case, just drop next node -+ && !skip_dims_combining) // trivial case, just drop next node - || (true // or real folding if possible -- && !skip_blk_idx -+ && !skip_dims_combining - && next_node.is - == static_cast( - this_node.n * this_node.is) -@@ -356,15 +498,20 @@ void prb_simplify(prb_t &p) { - this_node.n * this_node.os) - && next_node.ss - == static_cast( -- this_node.n * this_node.ss)); -+ this_node.n * this_node.ss) -+ && next_node.cs -+ == static_cast( -+ this_node.n * this_node.cs)); - if (fold) { - this_node.n *= next_node.n; -+ this_node.dim_id = node_t::empty_field; -+ this_node.is_zero_pad_needed = false; - for (int j = d + 2; j < p.ndims; ++j) - p.nodes[j - 1] = p.nodes[j]; -- if (d < p.blk_chunk_idx) --p.blk_chunk_idx; - --p.ndims; - --p.full_ndims; - --d; // make another try -+ if (p.is_tail_present) prb_node_dependency(p); - } - } - #if defined(__GNUC__) && __GNUC__ >= 4 -@@ -372,24 +519,42 @@ void prb_simplify(prb_t &p) { - #endif - } - --void prb_node_split(prb_t &p, int dim, size_t n1) { -+void prb_node_split(prb_t &p, int dim, size_t new_node_size) { - assert(dim < p.ndims); - assert(p.ndims < max_ndims); -- assert(p.nodes[dim].n % n1 == 0); -+ assert(p.nodes[dim].n % new_node_size == 0); - - p.ndims += 1; - p.full_ndims += 1; -- if (dim < p.blk_chunk_idx) p.blk_chunk_idx += 1; - - for (int d = p.ndims; d > dim + 1; --d) - p.nodes[d] = p.nodes[d - 1]; - -- p.nodes[dim + 1].n = p.nodes[dim].n / n1; -- p.nodes[dim + 1].is = p.nodes[dim].is * n1; -- p.nodes[dim + 1].os = p.nodes[dim].os * n1; -- p.nodes[dim + 1].ss = p.nodes[dim].ss * n1; -- -- p.nodes[dim].n = n1; -+ const size_t upper_node_size = p.nodes[dim].n / new_node_size; -+ const size_t lower_node_size = new_node_size; -+ p.nodes[dim + 1].n = upper_node_size; -+ p.nodes[dim].n = lower_node_size; -+ -+ const bool is_tail = p.nodes[dim].tail_size > 0; -+ const size_t upper_node_tail -+ = utils::div_up(p.nodes[dim].tail_size, lower_node_size) -+ == upper_node_size -+ ? 0 -+ : utils::div_up(p.nodes[dim].tail_size, lower_node_size); -+ const size_t lower_node_tail = p.nodes[dim].tail_size % lower_node_size; -+ p.nodes[dim].tail_size = is_tail ? lower_node_tail : 0; -+ p.nodes[dim + 1].tail_size = is_tail ? upper_node_tail : 0; -+ -+ p.nodes[dim + 1].is_zero_pad_needed -+ = p.nodes[dim].is_zero_pad_needed && p.nodes[dim + 1].tail_size > 0; -+ p.nodes[dim].is_zero_pad_needed -+ = p.nodes[dim].is_zero_pad_needed && p.nodes[dim].tail_size > 0; -+ -+ p.nodes[dim + 1].dim_id = p.nodes[dim].dim_id; -+ p.nodes[dim + 1].is = p.nodes[dim].is * lower_node_size; -+ p.nodes[dim + 1].os = p.nodes[dim].os * lower_node_size; -+ p.nodes[dim + 1].ss = p.nodes[dim].ss * lower_node_size; -+ p.nodes[dim + 1].cs = p.nodes[dim].cs * lower_node_size; - } - - void prb_node_swap(prb_t &p, int d0, int d1) { -@@ -425,8 +590,11 @@ void prb_dump(const prb_t &p) { - printf("@@@ type:%s:%s ndims:%d ", dnnl_dt2str(p.itype), - dnnl_dt2str(p.otype), p.ndims); - for (int d = 0; d < p.ndims; ++d) -- printf("[%zu:%td:%td:%td]", p.nodes[d].n, p.nodes[d].is, p.nodes[d].os, -- p.nodes[d].ss); -+ printf("[%zu:%zu:%d:%d:%s:%td:%td:%td:%td]", p.nodes[d].n, -+ p.nodes[d].tail_size, p.nodes[d].dim_id, -+ p.nodes[d].parent_node_id, -+ p.nodes[d].is_zero_pad_needed ? "true" : "false", p.nodes[d].is, -+ p.nodes[d].os, p.nodes[d].ss, p.nodes[d].cs); - printf(" off:%zu:%zu\n", p.ioff, p.ooff); - } - -diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp -index f51e3c22414..fdefec8a049 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -32,6 +33,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - REG_SR(f32, any, f32, any, fmt_order::any, spec::reference) - -@@ -44,6 +46,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, f32, nCw16c)) - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, f32, nCw8c)) -@@ -75,6 +78,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, f32, nChw16c)) -@@ -123,6 +127,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, f32, nCdhw16c)) -@@ -171,6 +176,7 @@ const impl_list_map_t ®ular_f32_f32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - -diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp -index fadbee0ecf8..b1881df80e0 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -31,6 +32,7 @@ const impl_list_map_t ®ular_f32_s32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, s32, nChw16c)) - REG_SR(f32, any, s32, any, fmt_order::any, spec::reference) -diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp -index b83d47b2d6f..6bd305c7b41 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -35,6 +36,7 @@ const impl_list_map_t ®ular_f32_s8_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, s8, nChw16c)) -diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp -index 4bae84307e6..d306c3abeb8 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -33,6 +34,7 @@ const impl_list_map_t ®ular_f32_u8_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - DNNL_NON_X64_ONLY(REG_SR_BIDIR(f32, any, u8, nChw16c)) - REG_SR(f32, any, u8, any, fmt_order::any, spec::reference) -diff --git a/src/cpu/reorder/cpu_reorder_regular_s32.cpp b/src/cpu/reorder/cpu_reorder_regular_s32.cpp -index 54d65661791..a8197402b0a 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_s32.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_s32.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -34,6 +35,7 @@ const impl_list_map_t ®ular_s32_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - DNNL_NON_X64_ONLY(REG_SR_BIDIR(s32, any, f32, nChw16c)) -diff --git a/src/cpu/reorder/cpu_reorder_regular_s8.cpp b/src/cpu/reorder/cpu_reorder_regular_s8.cpp -index f57d01e2009..ce18dc5caf1 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_s8.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_s8.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -41,6 +42,7 @@ const impl_list_map_t ®ular_s8_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - DNNL_NON_X64_ONLY(REG_SR_BIDIR(s8, any, f32, nChw16c)) -diff --git a/src/cpu/reorder/cpu_reorder_regular_u8.cpp b/src/cpu/reorder/cpu_reorder_regular_u8.cpp -index 73d731c3b15..87a58872262 100644 ---- a/src/cpu/reorder/cpu_reorder_regular_u8.cpp -+++ b/src/cpu/reorder/cpu_reorder_regular_u8.cpp -@@ -1,5 +1,6 @@ - /******************************************************************************* - * Copyright 2020-2022 Intel Corporation -+* Copyright 2022 FUJITSU LIMITED - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -35,6 +36,7 @@ const impl_list_map_t ®ular_u8_impl_list_map() { - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t)) - DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t)) - -+ DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_blk_reorder_t)) - DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t)) - - DNNL_NON_X64_ONLY(REG_SR_BIDIR(u8, any, f32, nChw16c)) diff --git a/third_party/mkl_dnn/onednn_acl_thread_local_scheduler.patch b/third_party/mkl_dnn/onednn_acl_thread_local_scheduler.patch index 11d6725f92eba8..9583308396dd1d 100644 --- a/third_party/mkl_dnn/onednn_acl_thread_local_scheduler.patch +++ b/third_party/mkl_dnn/onednn_acl_thread_local_scheduler.patch @@ -15,19 +15,19 @@ limitations under the License. ******************************************************************************* diff --git a/src/cpu/aarch64/acl_thread.cpp b/src/cpu/aarch64/acl_thread.cpp -index d7d83badcb..1a7bcd74ed 100644 +index fd2c76d01..bd7bed837 100644 --- a/src/cpu/aarch64/acl_thread.cpp +++ b/src/cpu/aarch64/acl_thread.cpp -@@ -41,14 +41,17 @@ void acl_thread_bind() { +@@ -55,14 +55,17 @@ void acl_set_benchmark_scheduler_default() { #endif - + #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL --void acl_set_custom_scheduler() { +-void acl_set_tp_scheduler() { - static std::once_flag flag_once; - // Create threadpool scheduler - std::shared_ptr threadpool_scheduler - = std::make_unique(); -+void acl_set_custom_scheduler(int intra_threads = 0) { ++void acl_set_tp_scheduler(int intra_threads = 0) { + static thread_local std::once_flag flag_once; // set CUSTOM scheduler in ACL std::call_once(flag_once, @@ -40,59 +40,58 @@ index d7d83badcb..1a7bcd74ed 100644 + + arm_compute::Scheduler::set(threadpool_scheduler); }); } - + void acl_set_threadpool_num_threads() { +@@ -102,14 +105,6 @@ void set_acl_threading() { + acl_set_benchmark_scheduler_default(); + } + #endif +-#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL +- if (verbose_has_profile_externals()) { +- acl_set_tp_benchmark_scheduler(); +- } else { +- acl_set_tp_scheduler(); +- } +- +-#endif + } + + } // namespace acl_thread_utils diff --git a/src/cpu/aarch64/acl_thread.hpp b/src/cpu/aarch64/acl_thread.hpp -index 46dde5eb05..13b3910515 100644 +index f073376e6..654a2aa5d 100644 --- a/src/cpu/aarch64/acl_thread.hpp +++ b/src/cpu/aarch64/acl_thread.hpp -@@ -34,7 +34,7 @@ void acl_thread_bind(); - +@@ -40,7 +40,7 @@ void acl_set_benchmark_scheduler_default(); + #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL // Retrieve threadpool size during primitive execution and set ThreadpoolScheduler num_threads --void acl_set_custom_scheduler(); -+void acl_set_custom_scheduler(int intra_threads); +-void acl_set_tp_scheduler(); ++void acl_set_tp_scheduler(int intra_threads); void acl_set_threadpool_num_threads(); - #endif - + // Swap BenchmarkScheduler for custom scheduler builds (i.e. ThreadPoolScheduler) for DNNL_VERBOSE=profile,profile_externals + void acl_set_tp_benchmark_scheduler(); diff --git a/src/cpu/aarch64/acl_threadpool_scheduler.cpp b/src/cpu/aarch64/acl_threadpool_scheduler.cpp -index 418d7f30f9..7eb8a052b0 100644 +index 439ca862e..6656c37a5 100644 --- a/src/cpu/aarch64/acl_threadpool_scheduler.cpp +++ b/src/cpu/aarch64/acl_threadpool_scheduler.cpp @@ -102,8 +102,6 @@ void ThreadpoolScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, void ThreadpoolScheduler::run_workloads( std::vector &workloads) { - + - arm_compute::lock_guard lock(this->_run_workloads_mutex); - const unsigned int num_threads = std::min(static_cast(_num_threads), static_cast(workloads.size())); diff --git a/src/cpu/cpu_engine.cpp b/src/cpu/cpu_engine.cpp -index 4ee70a405c..e9211f42e0 100644 +index 0bfec3871..7207b2b60 100644 --- a/src/cpu/cpu_engine.cpp +++ b/src/cpu/cpu_engine.cpp @@ -47,6 +47,7 @@ status_t cpu_engine_t::create_stream(stream_t **stream, unsigned flags) { #if DNNL_CPU_RUNTIME == DNNL_RUNTIME_THREADPOOL status_t cpu_engine_t::create_stream(stream_t **stream, dnnl::threadpool_interop::threadpool_iface *threadpool) { -+ dnnl::impl::cpu::aarch64::acl_thread_utils::acl_set_custom_scheduler(threadpool->get_num_threads()); ++ dnnl::impl::cpu::aarch64::acl_thread_utils::acl_set_tp_scheduler(threadpool->get_num_threads()); return safe_ptr_assign( *stream, new cpu_stream_t(this, threadpool)); } -diff --git a/src/cpu/cpu_engine.hpp b/src/cpu/cpu_engine.hpp -index 7aa077e4ef..2938650963 100644 ---- a/src/cpu/cpu_engine.hpp -+++ b/src/cpu/cpu_engine.hpp -@@ -175,11 +175,6 @@ public: - // dnnl_get_max_threads() == OMP_NUM_THREADS - dnnl::impl::cpu::aarch64::acl_thread_utils::acl_thread_bind(); - #endif -- --#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL -- // Set ACL scheduler for threadpool runtime -- dnnl::impl::cpu::aarch64::acl_thread_utils::acl_set_custom_scheduler(); --#endif - #endif - return status::success; - }; diff --git a/third_party/mkl_dnn/onednn_acl_threadcap.patch b/third_party/mkl_dnn/onednn_acl_threadcap.patch index fb190861936f89..3a33af153e917c 100644 --- a/third_party/mkl_dnn/onednn_acl_threadcap.patch +++ b/third_party/mkl_dnn/onednn_acl_threadcap.patch @@ -1,5 +1,5 @@ ******************************************************************************* - Copyright 2022 Arm Limited and affiliates. + Copyright 2023 Arm Limited and affiliates. SPDX-License-Identifier: Apache-2.0 Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,19 +15,19 @@ limitations under the License. ******************************************************************************* diff --git a/src/cpu/aarch64/acl_thread.cpp b/src/cpu/aarch64/acl_thread.cpp -index d7d83badc..5a263b8d5 100644 +index fd2c76d01..2d7c76d48 100644 --- a/src/cpu/aarch64/acl_thread.cpp +++ b/src/cpu/aarch64/acl_thread.cpp @@ -17,6 +17,8 @@ - + #include "cpu/aarch64/acl_thread.hpp" #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL #include "cpu/aarch64/acl_threadpool_scheduler.hpp" +#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP +#include #endif - - namespace dnnl { -@@ -29,9 +31,10 @@ namespace acl_thread_utils { + #include "cpu/aarch64/acl_benchmark_scheduler.hpp" + +@@ -30,9 +32,10 @@ namespace acl_thread_utils { #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP void acl_thread_bind() { static std::once_flag flag_once; diff --git a/third_party/mkl_dnn/onednn_acl_threadpool_scheduler.patch b/third_party/mkl_dnn/onednn_acl_threadpool_scheduler.patch deleted file mode 100644 index 0e0cb39e82f1bb..00000000000000 --- a/third_party/mkl_dnn/onednn_acl_threadpool_scheduler.patch +++ /dev/null @@ -1,45 +0,0 @@ - ******************************************************************************* - Copyright 2023 Arm Limited and affiliates. - SPDX-License-Identifier: Apache-2.0 - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ******************************************************************************* - -diff --git a/src/cpu/aarch64/acl_threadpool_scheduler.cpp b/src/cpu/aarch64/acl_threadpool_scheduler.cpp -index 418d7f30f..439ca862e 100644 ---- a/src/cpu/aarch64/acl_threadpool_scheduler.cpp -+++ b/src/cpu/aarch64/acl_threadpool_scheduler.cpp -@@ -1,5 +1,5 @@ - /******************************************************************************* --* Copyright 2022 Arm Ltd. and affiliates -+* Copyright 2022-2023 Arm Ltd. and affiliates - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -117,14 +117,14 @@ void ThreadpoolScheduler::run_workloads( - if (is_async) b.init(num_threads); - tp->parallel_for(num_threads, [&](int ithr, int nthr) { - bool is_main = get_active_threadpool() == tp; -- if (is_main) activate_threadpool(tp); -+ if (!is_main) activate_threadpool(tp); - // Make ThreadInfo local to avoid race conditions - ThreadInfo info; - info.cpu_info = &cpu_info(); - info.num_threads = nthr; - info.thread_id = ithr; - process_workloads(workloads, feeder, info); -- if (is_main) deactivate_threadpool(); -+ if (!is_main) deactivate_threadpool(); - if (is_async) b.notify(); - }); - if (is_async) b.wait(); diff --git a/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD b/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD index 106d86a64fa916..d453ee83240f68 100644 --- a/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD +++ b/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD @@ -27,6 +27,7 @@ _DNNL_RUNTIME_THREADPOOL = { "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", + "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", @@ -74,6 +75,7 @@ _DNNL_RUNTIME_OMP = { "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", + "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", @@ -124,9 +126,9 @@ expand_template( name = "dnnl_version_h", out = "include/oneapi/dnnl/dnnl_version.h", substitutions = { - "@DNNL_VERSION_MAJOR@": "2", - "@DNNL_VERSION_MINOR@": "7", - "@DNNL_VERSION_PATCH@": "3", + "@DNNL_VERSION_MAJOR@": "3", + "@DNNL_VERSION_MINOR@": "2", + "@DNNL_VERSION_PATCH@": "1", "@DNNL_VERSION_HASH@": "N/A", }, template = "include/oneapi/dnnl/dnnl_version.h.in", @@ -142,6 +144,7 @@ cc_library( ], exclude = [ "src/cpu/x64/**", + "src/cpu/rv64/**", ], ), copts = select({ diff --git a/third_party/xla/third_party/tsl/third_party/mkl_dnn/mkldnn_acl.BUILD b/third_party/xla/third_party/tsl/third_party/mkl_dnn/mkldnn_acl.BUILD index d07e0e378c1671..fab95348cdc8b0 100644 --- a/third_party/xla/third_party/tsl/third_party/mkl_dnn/mkldnn_acl.BUILD +++ b/third_party/xla/third_party/tsl/third_party/mkl_dnn/mkldnn_acl.BUILD @@ -27,6 +27,7 @@ _DNNL_RUNTIME_THREADPOOL = { "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", + "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", @@ -74,6 +75,7 @@ _DNNL_RUNTIME_OMP = { "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", + "#cmakedefine ONEDNN_BUILD_GRAPH": "#undef ONEDNN_BUILD_GRAPH", "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", @@ -124,9 +126,9 @@ expand_template( name = "dnnl_version_h", out = "include/oneapi/dnnl/dnnl_version.h", substitutions = { - "@DNNL_VERSION_MAJOR@": "2", - "@DNNL_VERSION_MINOR@": "7", - "@DNNL_VERSION_PATCH@": "3", + "@DNNL_VERSION_MAJOR@": "3", + "@DNNL_VERSION_MINOR@": "2", + "@DNNL_VERSION_PATCH@": "1", "@DNNL_VERSION_HASH@": "N/A", }, template = "include/oneapi/dnnl/dnnl_version.h.in", @@ -142,6 +144,7 @@ cc_library( ], exclude = [ "src/cpu/x64/**", + "src/cpu/rv64/**", ], ), copts = select({ diff --git a/third_party/xla/third_party/tsl/tsl/mkl/build_defs.bzl b/third_party/xla/third_party/tsl/tsl/mkl/build_defs.bzl index 18cc7235166a32..90030a39744c00 100644 --- a/third_party/xla/third_party/tsl/tsl/mkl/build_defs.bzl +++ b/third_party/xla/third_party/tsl/tsl/mkl/build_defs.bzl @@ -115,7 +115,7 @@ def onednn_v3_define(): An empty list of all other cases (include ARM builds). """ return select({ - "@local_tsl//tsl/mkl:build_with_mkl_aarch64": [], + "@local_tsl//tsl/mkl:build_with_mkl_aarch64": ["-DENABLE_ONEDNN_V3"], "@local_tsl//tsl:linux_x86_64": ["-DENABLE_ONEDNN_V3"], "@local_tsl//tsl:windows": ["-DENABLE_ONEDNN_V3"], "//conditions:default": [], diff --git a/third_party/xla/third_party/tsl/workspace2.bzl b/third_party/xla/third_party/tsl/workspace2.bzl index 261d456e9beda8..cfd8ab0f9f0472 100644 --- a/third_party/xla/third_party/tsl/workspace2.bzl +++ b/third_party/xla/third_party/tsl/workspace2.bzl @@ -166,19 +166,14 @@ def _tf_repositories(): name = "mkl_dnn_acl_compatible", build_file = "//tensorflow/third_party/mkl_dnn:mkldnn_acl.BUILD", patch_file = [ - "//tensorflow/third_party/mkl_dnn:onednn_acl_threadcap.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_remove_winograd.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_fixed_format_kernels.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_depthwise_convolution.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_threadpool_scheduler.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_reorder_padded.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_reorder_update.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_reorder.patch", - "//tensorflow/third_party/mkl_dnn:onednn_acl_thread_local_scheduler.patch", + "//third_party/mkl_dnn:onednn_acl_threadcap.patch", + "//third_party/mkl_dnn:onednn_acl_reorder.patch", + "//third_party/mkl_dnn:onednn_acl_thread_local_scheduler.patch", + "//third_party/mkl_dnn:onednn_acl_fp32_bf16_reorder.patch", ], - sha256 = "a50993aa6265b799b040fe745e0010502f9f7103cc53a9525d59646aef006633", - strip_prefix = "oneDNN-2.7.3", - urls = tf_mirror_urls("https://github.com/oneapi-src/oneDNN/archive/v2.7.3.tar.gz"), + sha256 = "2f76b407ef8893cca71340f88cd800019a1f14f8ac1bbdbb89a84be1370b52e3", + strip_prefix = "oneDNN-3.2.1", + urls = tf_mirror_urls("https://github.com/oneapi-src/oneDNN/archive/refs/tags/v3.2.1.tar.gz"), ) tf_http_archive( From cf3edc5aa450c8e7a95a93338e06a16e6d207e58 Mon Sep 17 00:00:00 2001 From: Moritz Firsching Date: Tue, 19 Sep 2023 16:11:04 +0200 Subject: [PATCH 020/567] nicer link to OpenSSF scorecard --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a40d3d357f19e9..dbe5e9a1c1b233 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![PyPI](https://badge.fury.io/py/tensorflow.svg)](https://badge.fury.io/py/tensorflow) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4724125.svg)](https://doi.org/10.5281/zenodo.4724125) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) -[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/tensorflow/tensorflow/badge)](https://api.securityscorecards.dev/projects/github.com/tensorflow/tensorflow) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/tensorflow/tensorflow/badge)](https://securityscorecards.dev/viewer/?uri=github.com/tensorflow/tensorflow) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/tensorflow.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:tensorflow) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/tensorflow-py.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:tensorflow-py) [![OSSRank](https://shields.io/endpoint?url=https://ossrank.com/shield/44)](https://ossrank.com/p/44) From 4e101ec87686515d10830da52b735c87ecbaab95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 02:02:05 -0700 Subject: [PATCH 021/567] compat: Update forward compatibility horizon to 2023-09-20 PiperOrigin-RevId: 566892239 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 495482568ac319..0be4fb3a98e8b1 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 19) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 20) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 79be3fa6129625e695913c99ad0d7592c733fd27 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 02:02:10 -0700 Subject: [PATCH 022/567] Update GraphDef version to 1625. PiperOrigin-RevId: 566892275 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 6736c45852074b..05991cc0e70419 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1624 // Updated: 2023/9/19 +#define TF_GRAPH_DEF_VERSION 1625 // Updated: 2023/9/20 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From f12ce2113e361dac536d95d6f1b396048259a3c3 Mon Sep 17 00:00:00 2001 From: Alan Kelly Date: Wed, 20 Sep 2023 02:12:01 -0700 Subject: [PATCH 023/567] Conv3D doesn't have to transpose weights PiperOrigin-RevId: 566894491 --- tensorflow/lite/kernels/conv3d.cc | 63 +++---------------- .../internal/optimized/optimized_ops.h | 34 ++++------ 2 files changed, 21 insertions(+), 76 deletions(-) diff --git a/tensorflow/lite/kernels/conv3d.cc b/tensorflow/lite/kernels/conv3d.cc index 9f775038ca8760..cf9eb2a8d4d89c 100644 --- a/tensorflow/lite/kernels/conv3d.cc +++ b/tensorflow/lite/kernels/conv3d.cc @@ -46,17 +46,14 @@ static constexpr size_t kMaxIm2colBufferSizeMobile = 1024 * 1024 * 1024; // 1GB struct OpData { Padding3DValues padding; int im2col_tensor_id = kTensorNotAllocated; - int transposed_filter_tensor_id = kTensorNotAllocated; bool need_im2col = false; - bool need_transposed_filter = false; // Disable im2col if the temporary im2col tensor requires too much memory // (i.e. >= kMaxIm2colBufferSizeMobile). bool im2col_oversized = false; int32_t im2col_index; - int32_t transposed_filter_index; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -83,16 +80,12 @@ TfLiteStatus AllocateTemporaryTensorsIfRequired( opdata->need_im2col = (kernel_type == kGenericOptimized) && (need_dilated_im2col || need_non_dilated_im2col); - // TODO(b/183455632): Add transposing logic in converter so constant folding - // might work on constant filter tensor. - opdata->need_transposed_filter = (kernel_type == kGenericOptimized); // On mobile platforms, the generic optimized kernel will not be used if the // temporary im2col tensor requires too much memory. if (IsMobilePlatform() && opdata->need_im2col && im2col_bytes >= kMaxIm2colBufferSizeMobile) { opdata->need_im2col = false; - opdata->need_transposed_filter = false; opdata->im2col_oversized = true; } @@ -104,15 +97,6 @@ TfLiteStatus AllocateTemporaryTensorsIfRequired( opdata->im2col_index = temporaries_count++; } - if (opdata->need_transposed_filter) { - if (opdata->transposed_filter_tensor_id == kTensorNotAllocated) { - TF_LITE_ENSURE_OK( - context, context->AddTensors(context, 1, - &opdata->transposed_filter_tensor_id)); - } - opdata->transposed_filter_index = temporaries_count++; - } - TfLiteIntArrayFree(node->temporaries); node->temporaries = TfLiteIntArrayCreate(temporaries_count); return kTfLiteOk; @@ -212,25 +196,6 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context, context->ResizeTensor(context, im2col, im2col_size)); } - if (opdata->need_transposed_filter) { - TfLiteIntArray* transposed_filter_size = TfLiteIntArrayCreate(5); - transposed_filter_size->data[0] = filter->dims->data[4]; - transposed_filter_size->data[1] = filter->dims->data[0]; - transposed_filter_size->data[2] = filter->dims->data[1]; - transposed_filter_size->data[3] = filter->dims->data[2]; - transposed_filter_size->data[4] = filter->dims->data[3]; - - TfLiteTensor* transposed_filter; - node->temporaries->data[opdata->transposed_filter_index] = - opdata->transposed_filter_tensor_id; - TF_LITE_ENSURE_OK(context, GetTemporarySafe(context, node, - opdata->transposed_filter_index, - &transposed_filter)); - transposed_filter->type = filter->type; - transposed_filter->allocation_type = kTfLiteArenaRw; - TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, transposed_filter, - transposed_filter_size)); - } return kTfLiteOk; } @@ -239,11 +204,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return Prepare(kernel_type, context, node); } -void EvalFloat(KernelType kernel_type, TfLiteContext* context, TfLiteNode* node, - TfLiteConv3DParams* params, OpData* opdata, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* tranposed_filter, TfLiteTensor* output) { +TfLiteStatus EvalFloat(KernelType kernel_type, TfLiteContext* context, + TfLiteNode* node, TfLiteConv3DParams* params, + OpData* opdata, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + TfLiteTensor* im2col, TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -265,19 +230,17 @@ void EvalFloat(KernelType kernel_type, TfLiteContext* context, TfLiteNode* node, GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); - break; + return kTfLiteOk; } case kGenericOptimized: { - optimized_ops::Conv3D( + return optimized_ops::Conv3D( runtime_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), GetTensorData(output), GetTensorShape(im2col), GetTensorData(im2col), - GetTensorShape(tranposed_filter), - GetTensorData(tranposed_filter), CpuBackendContext::GetFromContext(context)); - } break; + } } } @@ -297,11 +260,6 @@ TfLiteStatus Eval(KernelType kernel_type, TfLiteContext* context, TfLiteTensor* im2col = opdata->need_im2col ? &context->tensors[opdata->im2col_tensor_id] : nullptr; - TfLiteTensor* transposed_filter = - opdata->need_transposed_filter - ? &context->tensors[opdata->transposed_filter_tensor_id] - : nullptr; - // Fallback to reference execution path when im2col is needed but disabled. if (opdata->im2col_oversized) { kernel_type = kReference; @@ -309,9 +267,8 @@ TfLiteStatus Eval(KernelType kernel_type, TfLiteContext* context, switch (input->type) { case kTfLiteFloat32: - EvalFloat(kernel_type, context, node, params, opdata, input, filter, bias, - im2col, transposed_filter, output); - break; + return EvalFloat(kernel_type, context, node, params, opdata, input, + filter, bias, im2col, output); default: TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.", TfLiteTypeGetName(input->type)); diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 847d870065cc4d..5609719398fcee 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -7957,15 +7957,13 @@ inline void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data, ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data); } -inline void Conv3D(const Conv3DParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& filter_shape, - const float* filter_data, const RuntimeShape& bias_shape, - const float* bias_data, const RuntimeShape& output_shape, - float* output_data, const RuntimeShape& im2col_shape, - float* im2col_data, - const RuntimeShape& transposed_filter_shape, - float* transposed_filter_data, - CpuBackendContext* cpu_backend_context) { +inline TfLiteStatus Conv3D( + const Conv3DParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, float* im2col_data, + CpuBackendContext* cpu_backend_context) { const int stride_depth = params.stride_depth; const int stride_height = params.stride_height; const int stride_width = params.stride_width; @@ -8012,24 +8010,13 @@ inline void Conv3D(const Conv3DParams& params, const RuntimeShape& input_shape, gemm_input_shape = &input_shape; } - // Transpose the filter tensor. - TransposeParams transpose_params; - transpose_params.perm_count = 5; - transpose_params.perm[0] = 4; - transpose_params.perm[1] = 0; - transpose_params.perm[2] = 1; - transpose_params.perm[3] = 2; - transpose_params.perm[4] = 3; - Transpose(transpose_params, filter_shape, filter_data, - transposed_filter_shape, transposed_filter_data); - const int gemm_input_dims = gemm_input_shape->DimensionsCount(); int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1); int n = output_shape.Dims(4); int k = gemm_input_shape->Dims(gemm_input_dims - 1); cpu_backend_gemm::MatrixParams lhs_params; - lhs_params.order = cpu_backend_gemm::Order::kRowMajor; + lhs_params.order = cpu_backend_gemm::Order::kColMajor; lhs_params.rows = n; lhs_params.cols = k; cpu_backend_gemm::MatrixParams rhs_params; @@ -8044,9 +8031,10 @@ inline void Conv3D(const Conv3DParams& params, const RuntimeShape& input_shape, gemm_params.bias = bias_data; gemm_params.clamp_min = output_activation_min; gemm_params.clamp_max = output_activation_max; - cpu_backend_gemm::Gemm(lhs_params, transposed_filter_data, rhs_params, - gemm_input_data, dst_params, output_data, gemm_params, + cpu_backend_gemm::Gemm(lhs_params, filter_data, rhs_params, gemm_input_data, + dst_params, output_data, gemm_params, cpu_backend_context); + return kTfLiteOk; } // Returns in 'im_data' (assumed to be zero-initialized) image patch in storage From fea0c082192a4063faf3b2f54a48b52cbb602487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20Danyluk?= Date: Wed, 20 Sep 2023 02:49:10 -0700 Subject: [PATCH 024/567] [XLA:GPU] Fix boundary checks for split-K GEMM fusions Without this fix, we get CUDA_ERROR_ILLEGAL_ADDRESS on the added test case. PiperOrigin-RevId: 566902695 --- .../xla/xla/service/gpu/ir_emitter_triton.cc | 30 ++++++++++++----- .../xla/service/gpu/ir_emitter_triton_test.cc | 32 +++++++++++++++++++ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc index bce462fea8bc5c..d431f1349c294f 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc @@ -432,9 +432,16 @@ Value EmitConstant(ImplicitLocOpBuilder& b, const HloInstruction& constant) { } struct DimProperties { + DimProperties(int64_t index, Value offset, int block_size, int split_value) + : index(index), + offset(offset), + block_size(block_size), + split_value(split_value) {} + int64_t index; Value offset; int block_size; + int split_value; }; Value EmitBroadcast(ImplicitLocOpBuilder& b, @@ -1075,7 +1082,7 @@ class MatMulEmitterHelper { // is logically split, major part is addressed using pid_batch. count /= *dims_.lhs_noncontracting_split; } - if (count % properties.block_size != 0) { + if (count % (properties.block_size * properties.split_value) != 0) { boundary_checks.push_back(bounds.size()); } bounds.push_back(Cst64(count)); @@ -1269,18 +1276,24 @@ Status EmitMatMul(mlir::OpBuilder builder, absl::string_view libdevice_path, Side lhs{TritonFusionAnalysis::Scope::LHS, /*tiled_dims=*/ - {{dims.lhs_noncontracting_dim_idx, pid_m_offset, block_m}, - {dims.lhs_contracting_dim_idx, pid_k_offset, block_k}}, + {DimProperties(dims.lhs_noncontracting_dim_idx, pid_m_offset, + block_m, /*split_value=*/1), + DimProperties(dims.lhs_contracting_dim_idx, pid_k_offset, block_k, + split_k)}, dims.lhs_batch_dim_idx}; Side rhs{TritonFusionAnalysis::Scope::RHS, /*tiled_dims=*/ - {{dims.rhs_contracting_dim_idx, pid_k_offset, block_k}, - {dims.rhs_noncontracting_dim_idx, pid_n_offset, block_n}}, + {DimProperties(dims.rhs_contracting_dim_idx, pid_k_offset, block_k, + split_k), + DimProperties(dims.rhs_noncontracting_dim_idx, pid_n_offset, + block_n, /*split_value=*/1)}, dims.rhs_batch_dim_idx}; Side out{TritonFusionAnalysis::Scope::OUTPUT, /*tiled_dims=*/ - {{dims.out_lhs_noncontracting_dim_idx, pid_m_offset, block_m}, - {dims.out_rhs_noncontracting_dim_idx, pid_n_offset, block_n}}, + {DimProperties(dims.out_lhs_noncontracting_dim_idx, pid_m_offset, + block_m, /*split_value=*/1), + DimProperties(dims.out_rhs_noncontracting_dim_idx, pid_n_offset, + block_n, /*split_value=*/1)}, dims.out_batch_dim_idx}; auto body_builder = [&](mlir::OpBuilder&, mlir::Location, Value ki, @@ -1516,7 +1529,8 @@ Status EmitSoftMax(mlir::OpBuilder builder, absl::string_view libdevice_path, b, make_tensor_pointer(fn.getArgument(0)), boundary_checks); // Dimension 0 is the reduced one by construction and it's the only one // present in the tile shapes. - std::vector tiled_dims = {{0, row_index, block_row}}; + std::vector tiled_dims = { + DimProperties(0, row_index, block_row, /*split_value=*/1)}; TF_ASSIGN_OR_RETURN( Value result, EmitScope(b, libdevice_path, &analysis, diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 2ffe9356e39bd9..65f6906475d8e2 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -235,6 +235,38 @@ ENTRY entry { EXPECT_GT(result.shmem_bytes, dev_info.shared_memory_per_block); } +TEST_F(TritonGemmTest, WorksWhenKIsDivisibleByBlockKButNotByBlockKTimesSplitK) { + // The condition mentioned in the test name is fulfilled by + // GemmKey(16, 64, 256, 8, 1, 4), which was part of the default configs for + // Ampere at the time of the addition of this test case. + constexpr absl::string_view kHloText = R"( +HloModule extracted + +ENTRY e { + a = f16[16,5120]{1,0} parameter(0) + b = s8[5120,10240]{1,0} parameter(1) + converted_b = f16[5120,10240]{1,0} convert(b) + ROOT r = f16[16,10240]{1,0} dot(a, converted_b), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + // This check tests if Triton is used at all plus it runs TritonAutotuner, + // which verifies if the generated kernels can run without errors such as + // CUDA_ERROR_ILLEGAL_ADDRESS. + MatchOptimizedHlo(kHloText, R"( +; CHECK: ENTRY +; CHECK-NEXT: parameter +; CHECK-NEXT: parameter +; CHECK-NEXT: fusion( +; CHECK-SAME: kind=kCustom +; CHECK-SAME: "block_m": + )"); + + // Not doing a comparison here, because the input matrices are quite big. + // If I reduce their size then they can no longer trigger the error, that I + // want to avoid with this test case. +} + TEST_F(TritonGemmTest, MultipleDims) { const std::string hlo_text = R"( HloModule t From 879ffa3aca8e6bdb3eb9ae1a3081dc7d37ce0d23 Mon Sep 17 00:00:00 2001 From: Fadi Arafeh Date: Mon, 18 Sep 2023 18:40:30 +0100 Subject: [PATCH 025/567] BF16 capability detection for Ubuntu 20.04 This adds a patch to oneDNN for BF16 capability detection for Ubuntu 20.04 on aarch64. The contents in the patch are fully authored by @kawakami-k, the source used for this patch is available here: oneapi-src/oneDNN#1670 --- tensorflow/workspace2.bzl | 1 + ...capability_detection_for_ubuntu20.04.patch | 50 +++++++++++++++++++ .../xla/third_party/tsl/workspace2.bzl | 1 + 3 files changed, 52 insertions(+) create mode 100644 third_party/mkl_dnn/onednn_acl_bf16_capability_detection_for_ubuntu20.04.patch diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl index 22d25f101488aa..6c30a25d0397f3 100644 --- a/tensorflow/workspace2.bzl +++ b/tensorflow/workspace2.bzl @@ -208,6 +208,7 @@ def _tf_repositories(): "//third_party/mkl_dnn:onednn_acl_reorder.patch", "//third_party/mkl_dnn:onednn_acl_thread_local_scheduler.patch", "//third_party/mkl_dnn:onednn_acl_fp32_bf16_reorder.patch", + "//third_party/mkl_dnn:onednn_acl_bf16_capability_detection_for_ubuntu20.04.patch", ], sha256 = "2f76b407ef8893cca71340f88cd800019a1f14f8ac1bbdbb89a84be1370b52e3", strip_prefix = "oneDNN-3.2.1", diff --git a/third_party/mkl_dnn/onednn_acl_bf16_capability_detection_for_ubuntu20.04.patch b/third_party/mkl_dnn/onednn_acl_bf16_capability_detection_for_ubuntu20.04.patch new file mode 100644 index 00000000000000..6d6f0c0eaabb13 --- /dev/null +++ b/third_party/mkl_dnn/onednn_acl_bf16_capability_detection_for_ubuntu20.04.patch @@ -0,0 +1,50 @@ +From 9a9430c7db870b78c6402d786a67921af4a66334 Mon Sep 17 00:00:00 2001 +From: Kentaro Kawakami +Date: Fri, 26 May 2023 10:58:36 +0900 +Subject: [PATCH] cpu: aarch64: xbyak_aarch64: BF16 capability detection for + Ubuntu 20.04 + +--- + .../aarch64/xbyak_aarch64/src/util_impl_linux.h | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h +index 743843bae50..3db37e972d1 100644 +--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h ++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h +@@ -39,6 +39,13 @@ + #include + #endif + ++/* Linux kernel used in Ubuntu 20.04 does not have HWCAP2_BF16 definition. */ ++#ifdef AT_HWCAP2 ++#ifndef HWCAP2_BF16 ++#define HWCAP2_BF16 (1UL << 14) ++#endif ++#endif ++ + namespace Xbyak_aarch64 { + namespace util { + #define XBYAK_AARCH64_ERROR_ fprintf(stderr, "%s, %d, Error occurrs during read cache infomation.\n", __FILE__, __LINE__); +@@ -383,7 +390,7 @@ class CpuInfoLinux : public CpuInfo { + } + + void setHwCap() { +- unsigned long hwcap = getauxval(AT_HWCAP); ++ const unsigned long hwcap = getauxval(AT_HWCAP); + if (hwcap & HWCAP_ATOMICS) + type_ |= (Type)XBYAK_AARCH64_HWCAP_ATOMIC; + +@@ -391,8 +398,10 @@ class CpuInfoLinux : public CpuInfo { + type_ |= (Type)XBYAK_AARCH64_HWCAP_FP; + if (hwcap & HWCAP_ASIMD) + type_ |= (Type)XBYAK_AARCH64_HWCAP_ADVSIMD; +-#ifdef HWCAP2_BF16 +- if (hwcap & HWCAP2_BF16) ++ ++#ifdef AT_HWCAP2 ++ const unsigned long hwcap2 = getauxval(AT_HWCAP2); ++ if (hwcap2 & HWCAP2_BF16) + type_ |= (Type)XBYAK_AARCH64_HWCAP_BF16; + #endif + diff --git a/third_party/xla/third_party/tsl/workspace2.bzl b/third_party/xla/third_party/tsl/workspace2.bzl index cfd8ab0f9f0472..06b25e92a4cf87 100644 --- a/third_party/xla/third_party/tsl/workspace2.bzl +++ b/third_party/xla/third_party/tsl/workspace2.bzl @@ -170,6 +170,7 @@ def _tf_repositories(): "//third_party/mkl_dnn:onednn_acl_reorder.patch", "//third_party/mkl_dnn:onednn_acl_thread_local_scheduler.patch", "//third_party/mkl_dnn:onednn_acl_fp32_bf16_reorder.patch", + "//third_party/mkl_dnn:onednn_acl_bf16_capability_detection_for_ubuntu20.04.patch", ], sha256 = "2f76b407ef8893cca71340f88cd800019a1f14f8ac1bbdbb89a84be1370b52e3", strip_prefix = "oneDNN-3.2.1", From f3578fae9a3195dd6ba58cd896a08869c5783ff2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 05:11:52 -0700 Subject: [PATCH 026/567] Initialize creation_pass_id in GPU Compiler This sets the creation pass id and the logical creation pass id to -1 for all ops in the input HLO. -1 is the special value which identifies ops present in the input HLO. By setting the -1 for input ops we will be able to differentiate ops originating from the input from ops generated by optimization passes. PiperOrigin-RevId: 566929969 --- third_party/xla/xla/service/gpu/gpu_compiler.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 9caa2ccdbbde36..bad85be4677da6 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -293,6 +293,15 @@ void AddHloVerifier(HloPassPipeline* pipeline, HloVerifierOpts&& opts = {}, "hlo verifier"); } } + +void SetInstructionMetadata(HloModule* module) { + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + instruction->set_creation_pass_id(-1); + instruction->set_logical_creation_pass_id(-1); + } + } +} } // namespace // Runs optimization passes on the given HLO module. @@ -344,6 +353,8 @@ Status GpuCompiler::OptimizeHloModule(HloModule* hlo_module, layout_insensitive_algsimp_opts .set_enable_unconditional_reduce_of_concat_replacement(false); + SetInstructionMetadata(hlo_module); + HloPassPipeline pre_spmd_pipeline("pre-spmd-partitioner"); // Run some IR cleanup passes before running the SPMD partitioning // passes. From 1b7a86988e88103ba5567f478f6c9425b3766e01 Mon Sep 17 00:00:00 2001 From: Oleg Shyshkov Date: Wed, 20 Sep 2023 05:22:54 -0700 Subject: [PATCH 027/567] [XLA:GPU] Priority fusion: don't allow partial fusions. Only consider cases when a producer can be fused will all the consumers. Otherwise fusion is not beneficial. PiperOrigin-RevId: 566932055 --- third_party/xla/xla/service/gpu/BUILD | 1 + .../xla/xla/service/gpu/priority_fusion.cc | 26 +++++++------------ 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 7c3efff98a8496..46de10211d6ccb 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -1960,6 +1960,7 @@ cc_library( "//xla/service:hlo_cost_analysis", "//xla/service:hlo_pass", "//xla/service:instruction_fusion", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", diff --git a/third_party/xla/xla/service/gpu/priority_fusion.cc b/third_party/xla/xla/service/gpu/priority_fusion.cc index dfbc6b5cddbcec..1245f5a83458b2 100644 --- a/third_party/xla/xla/service/gpu/priority_fusion.cc +++ b/third_party/xla/xla/service/gpu/priority_fusion.cc @@ -25,6 +25,7 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" @@ -114,7 +115,7 @@ class GpuPriorityFusionQueue : public FusionQueue { if (priority < 0) { continue; } - current_consumers_ = GetFusibleUsers(current_producer_); + current_consumers_ = current_producer_->users(); } auto next_consumer = current_consumers_.back(); @@ -230,31 +231,22 @@ class GpuPriorityFusionQueue : public FusionQueue { // Returns the priority of the producer based on its current operands and // users. Priority CalculateProducerPriority(HloInstruction* producer) { - std::vector fusible_users = GetFusibleUsers(producer); - - // Don't bother computing cost for non-fusible ops. - if (fusible_users.empty()) { + // Don't fuse if we can't fuse in all users. + if (!CanFuseWithAllUsers(producer)) { return std::numeric_limits::min(); } GpuPerformanceModel::RunTimes run_times = GpuPerformanceModel::EstimateRunTimes(producer, &cost_analysis_, - fusible_users); + producer->users()); return absl::ToInt64Nanoseconds(run_times.time_unfused - run_times.time_fused); } - std::vector GetFusibleUsers(HloInstruction* producer) const { - std::vector fusible_users; - for (auto user : producer->users()) { - int64_t operand_index = user->operand_index(producer); - - if (can_fuse_(user, operand_index)) { - fusible_users.push_back(user); - } - } - - return fusible_users; + bool CanFuseWithAllUsers(HloInstruction* producer) const { + return absl::c_all_of(producer->users(), [&](HloInstruction* user) { + return can_fuse_(user, user->operand_index(producer)); + }); } // Store computation for cost analysis. From 40567bcecebfaca299cd1bc1ac8019bddb2f4db6 Mon Sep 17 00:00:00 2001 From: Rahul Batra Date: Tue, 27 Sep 2022 16:03:26 +0000 Subject: [PATCH 028/567] [ROCm]: Updates for rocm_dnn header dependency --- tensorflow/core/kernels/BUILD | 2 ++ tensorflow/core/platform/BUILD | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 37aa3cbf625cde..c258e5c5a0ff7e 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2397,6 +2397,8 @@ tf_kernel_library( ":conv_ops_gpu_hdrs", ]) + if_cuda([ "@local_config_cuda//cuda:cudnn_header", + ])+ if_rocm([ + "//tensorflow/core/platform:stream_executor", ]), ) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 28f25dedc66fb9..229277ad00c0d8 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -1109,7 +1109,9 @@ tf_cuda_library( "@local_xla//xla/stream_executor/host:host_platform_id", "@local_xla//xla/stream_executor/platform:dso_loader", "@local_xla//xla/stream_executor/rocm:rocm_platform_id", - ], + ] + if_rocm_is_configured([ + "@local_xla//xla/stream_executor/rocm:miopen_plugin", + ]), ) # Like stream_executor library, but compiles without --config=cuda From 9ff97264b103695012907913ebce1d0a3d3b0930 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 07:39:32 -0700 Subject: [PATCH 029/567] Integrate LLVM at llvm/llvm-project@2baf4a06ef06 Updates LLVM usage to match [2baf4a06ef06](https://github.com/llvm/llvm-project/commit/2baf4a06ef06) PiperOrigin-RevId: 566960191 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 9a1d9a278d1d19..a1abdc50e811c6 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "bbe3ee061f6d4698988b7c7aae0d607f0f2e3db5" - LLVM_SHA256 = "25488abfd0967154c37b0472eea7d55b1d1c5854207b0a0aabf80cfd8de5d629" + LLVM_COMMIT = "2baf4a06ef06c51c2ef09f981f204983b0f8082c" + LLVM_SHA256 = "0ce881f09d65b27810160d02842d42259209506fac98f3f9389059c8b8429d69" tf_http_archive( name = name, From 0d09ec0c1e2cce294b338e8f2e0e52ce9ccdc5e3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 08:07:13 -0700 Subject: [PATCH 030/567] Move missed bazel config to LLVM17 toolchain PiperOrigin-RevId: 566966919 --- ci/official/bazelrcs/cpu.bazelrc | 16 ++++++++-------- ci/official/bazelrcs/cuda.bazelrc | 22 +++++++++++----------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/ci/official/bazelrcs/cpu.bazelrc b/ci/official/bazelrcs/cpu.bazelrc index 43c951e3532466..096893bdc096c0 100644 --- a/ci/official/bazelrcs/cpu.bazelrc +++ b/ci/official/bazelrcs/cpu.bazelrc @@ -49,7 +49,7 @@ build --linkopt="-lm" build --copt=-Wno-gnu-offsetof-extensions # Use the NVCC toolchain to compile for manylinux2014 -build --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" # Test-related settings below this point. test --build_tests_only --keep_going --test_output=errors --verbose_failures=true @@ -88,14 +88,14 @@ build:rbe --spawn_strategy=remote,worker,standalone,local build:rbe --remote_download_toplevel build:rbe --action_env=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin" build:rbe --linkopt=-lrt --host_linkopt=-lrt --linkopt=-lm --host_linkopt=-lm # Unclear why this is here -build:rbe --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" -build:rbe --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" -build:rbe --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" -build:rbe --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" -build:rbe --platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:rbe --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:rbe --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" # Python config is the same across all containers because the binary is the same -build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" +build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" build:rbe --remote_instance_name=projects/tensorflow-testing/instances/default_instance # For continuous builds diff --git a/ci/official/bazelrcs/cuda.bazelrc b/ci/official/bazelrcs/cuda.bazelrc index 4d6df84931675d..14f4c4b96d0b6e 100644 --- a/ci/official/bazelrcs/cuda.bazelrc +++ b/ci/official/bazelrcs/cuda.bazelrc @@ -111,24 +111,24 @@ build:rbe --spawn_strategy=remote,worker,standalone,local build:rbe --remote_download_toplevel build:rbe --action_env=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin" build:rbe --linkopt=-lrt --host_linkopt=-lrt --linkopt=-lm --host_linkopt=-lm # Unclear why this is here -build:rbe --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" -build:rbe --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" -build:rbe --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" -build:rbe --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" -build:rbe --platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:rbe --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:rbe --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" # Python config is the same across all containers because the binary is the same -build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" +build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" build:rbe --remote_instance_name=projects/tensorflow-testing/instances/default_instance build:rbe --project_id="tensorflow-testing" # For Remote build execution -- GPU configuration build:rbe --repo_env=REMOTE_GPU_TESTING=1 test:rbe --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" -build:rbe --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang_config_cuda" -build:rbe --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang_config_tensorrt" -build:rbe --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang_config_nccl" -build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" +build:rbe --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang17_config_cuda" +build:rbe --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang17_config_tensorrt" +build:rbe --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang17_config_nccl" +build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" # For continuous builds test:pycpp_filters --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-benchmark-test,-v1only,gpu,-no_gpu,-no_gpu_presubmit,-no_cuda11 From a8866e20fafb66161e403493f188a5639a023028 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 08:23:00 -0700 Subject: [PATCH 031/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/752d6d83d403986227dffe42beb5014843cf2ddb. PiperOrigin-RevId: 566971528 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index 6abf8996baba2f..ab7501924e78b2 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e20500b4e4cb784965a31f67b062cf4982ab1136" - TFRT_SHA256 = "36abbebdb50b97391b5c608e9cd606ee998256dbe7a865723ad208b57bc04d78" + TFRT_COMMIT = "752d6d83d403986227dffe42beb5014843cf2ddb" + TFRT_SHA256 = "818d9b3951c1da81a937a24c3875bfae38664ceb37909e5438bbebf708279a24" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index 6abf8996baba2f..ab7501924e78b2 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e20500b4e4cb784965a31f67b062cf4982ab1136" - TFRT_SHA256 = "36abbebdb50b97391b5c608e9cd606ee998256dbe7a865723ad208b57bc04d78" + TFRT_COMMIT = "752d6d83d403986227dffe42beb5014843cf2ddb" + TFRT_SHA256 = "818d9b3951c1da81a937a24c3875bfae38664ceb37909e5438bbebf708279a24" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index 6abf8996baba2f..ab7501924e78b2 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e20500b4e4cb784965a31f67b062cf4982ab1136" - TFRT_SHA256 = "36abbebdb50b97391b5c608e9cd606ee998256dbe7a865723ad208b57bc04d78" + TFRT_COMMIT = "752d6d83d403986227dffe42beb5014843cf2ddb" + TFRT_SHA256 = "818d9b3951c1da81a937a24c3875bfae38664ceb37909e5438bbebf708279a24" tf_http_archive( name = "tf_runtime", From 69923fd52754c1c501e45f8cd87a7d93a74bcc4c Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Wed, 20 Sep 2023 08:24:32 -0700 Subject: [PATCH 032/567] Fix layout logic in IrArray::Index::Linearize. The logic previously assumed the layout was major-to-minor. In practice, I believe this assumption is correct in all places where Linearize is called, because LayoutNormalization converts most instructions to be major-to-minor. But in general, IrArray supports arbitrary layouts and so we should obey the given layout in IrArray::Index::Linearize as well. I am working on a change which calls IrArray::Index::Linearize in another place, and it requires Linearize to correctly handle non-major-to-minor layouts. Also fix a comment incorrectly stating multidim was major-to-minor. PiperOrigin-RevId: 566971940 --- .../service/gpu/tests/slice_to_dynamic.hlo | 6 +++--- .../xla/xla/service/llvm_ir/ir_array.cc | 20 ++++++++++--------- .../xla/xla/service/llvm_ir/ir_array.h | 6 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo b/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo index f5621261cb7504..54aa009ef75a93 100644 --- a/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo +++ b/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo @@ -42,9 +42,9 @@ // CHECK: custom_call.in_bounds-true: ; preds = %[[VAL_11]] // CHECK: %[[VAL_34:.*]] = mul nuw nsw i32 %[[VAL_22]], 1 // CHECK: %[[VAL_35:.*]] = add nuw nsw i32 0, %[[VAL_34]] -// CHECK: %[[VAL_36:.*]] = mul nuw nsw i32 %[[VAL_25]], 2 +// CHECK: %[[VAL_36:.*]] = mul nuw nsw i32 %[[VAL_24]], 2 // CHECK: %[[VAL_37:.*]] = add nuw nsw i32 %[[VAL_35]], %[[VAL_36]] -// CHECK: %[[VAL_38:.*]] = mul nuw nsw i32 %[[VAL_24]], 4 +// CHECK: %[[VAL_38:.*]] = mul nuw nsw i32 %[[VAL_25]], 4 // CHECK: %[[VAL_39:.*]] = add nuw nsw i32 %[[VAL_37]], %[[VAL_38]] // CHECK: %[[VAL_40:.*]] = icmp ult i32 %[[VAL_39]], %[[VAL_15]] // CHECK: br i1 %[[VAL_40]], label %[[VAL_41:.*]], label %[[VAL_29]] @@ -59,7 +59,7 @@ // CHECK: %[[VAL_47:.*]] = mul i32 %[[VAL_44]], %[[VAL_0]] // CHECK: %[[VAL_48:.*]] = udiv i32 %[[VAL_39]], %[[VAL_47]] // CHECK: %[[VAL_49:.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr %[[VAL_50:.*]], i32 0, i32 %[[VAL_48]], i32 %[[VAL_46]], i32 %[[VAL_43]] -// CHECK: %[[VAL_51:.*]] = load i32, ptr %[[VAL_49]], align 4, !invariant.load +// CHECK: %[[VAL_51:.*]] = load i32, ptr %[[VAL_49]], align 4, !invariant.load !4 // CHECK: %[[VAL_52:.*]] = getelementptr inbounds i32, ptr %[[VAL_31]], i32 %[[VAL_19]] // CHECK: store i32 %[[VAL_51]], ptr %[[VAL_52]], align 4 // CHECK: br label %[[VAL_29]] diff --git a/third_party/xla/xla/service/llvm_ir/ir_array.cc b/third_party/xla/xla/service/llvm_ir/ir_array.cc index 7708db56edb585..36794b4b757e8f 100644 --- a/third_party/xla/xla/service/llvm_ir/ir_array.cc +++ b/third_party/xla/xla/service/llvm_ir/ir_array.cc @@ -442,14 +442,15 @@ llvm::Value* IrArray::Index::Linearize(absl::Span dimensions, CHECK_EQ(size(), dimensions.size()); llvm::Value* logical_linear_index = GetConstantWithIndexType(0); int64_t multiplier = 1; - for (ssize_t i = size() - 1; i >= 0; --i) { - llvm::Value* addend = - builder->CreateMul((*this)[i], GetConstantWithIndexType(multiplier), "", - /*HasNUW=*/true, /*HasNSW=*/true); + for (ssize_t i = 0; i < size(); ++i) { + int64_t dimension = layout_.minor_to_major(i); + llvm::Value* addend = builder->CreateMul( + (*this)[dimension], GetConstantWithIndexType(multiplier), "", + /*HasNUW=*/true, /*HasNSW=*/true); addend = builder->CreateZExtOrTrunc(addend, index_type_); logical_linear_index = builder->CreateAdd(logical_linear_index, addend, "", /*HasNUW=*/true, /*HasNSW=*/true); - multiplier *= dimensions[i]; + multiplier *= dimensions[dimension]; } return logical_linear_index; } @@ -462,14 +463,15 @@ llvm::Value* IrArray::Index::Linearize( CHECK_EQ(size(), dynamic_dims.size()); llvm::Value* logical_linear_index = GetConstantWithIndexType(0); llvm::Value* multiplier = GetConstantWithIndexType(1); - for (ssize_t i = size() - 1; i >= 0; --i) { - llvm::Value* addend = builder->CreateMul((*this)[i], multiplier, "", + for (ssize_t i = 0; i < size(); ++i) { + int64_t dimension = layout_.minor_to_major(i); + llvm::Value* addend = builder->CreateMul((*this)[dimension], multiplier, "", /*HasNUW=*/true, /*HasNSW=*/true); addend = builder->CreateZExtOrTrunc(addend, index_type_); logical_linear_index = builder->CreateAdd(logical_linear_index, addend, "", /*HasNUW=*/true, /*HasNSW=*/true); - if (i) { - multiplier = builder->CreateMul(multiplier, dynamic_dims[i], + if (i < size() - 1) { + multiplier = builder->CreateMul(multiplier, dynamic_dims[dimension], /*Name=*/"multiplier"); } } diff --git a/third_party/xla/xla/service/llvm_ir/ir_array.h b/third_party/xla/xla/service/llvm_ir/ir_array.h index b67c1536b20524..fdebb5fbaf17b6 100644 --- a/third_party/xla/xla/service/llvm_ir/ir_array.h +++ b/third_party/xla/xla/service/llvm_ir/ir_array.h @@ -42,9 +42,9 @@ namespace llvm_ir { // are supported. class IrArray { public: - // A multidimensional index into an IrArray. All the runtime indices - // (multidim) and dimensions (Shape::dimensions(), absl::Span) - // are major-first. + // A multidimensional index into an IrArray. The order of the runtime indices + // (multidim) corresponds to the order of dimensions in the Shape passed to + // the constructor. // // This may also keep a linear index and the layout and dimensions it was // emitted for; if the shape where this `Index` is used matches, the linear From 5f7dd0c0e3df973e1aff926fec11f08df7fcf859 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 08:38:03 -0700 Subject: [PATCH 033/567] Internal, fix build error. PiperOrigin-RevId: 566975236 --- .../xla/third_party/tsl/tsl/platform/windows/subprocess.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc b/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc index a28a30c1cf7510..e31432a0047575 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc +++ b/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc @@ -95,9 +95,9 @@ DWORD WINAPI OutputThreadFunction(LPVOID param) { SubProcess::SubProcess(int nfds) : running_(false), + win_pi_(nullptr), exec_path_(nullptr), - exec_argv_(nullptr), - win_pi_(nullptr) { + exec_argv_(nullptr) { // The input 'nfds' parameter is currently ignored and the internal constant // 'kNFds' is used to support the 3 channels (stdin, stdout, stderr). for (int i = 0; i < kNFds; i++) { From 95060833cffbc61eecea769d07bb05914f50d502 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 08:38:21 -0700 Subject: [PATCH 034/567] Update Docker image tags and digests in Kokoro and Bazel This is changing the Kokoro job config back to its original state (using the latest-python3.X docker image tags) and is updating the Bazel remote toolchain config to the latest Docker containers. (The ones created here https://github.com/tensorflow/tensorflow/actions/runs/6239795702/job/16938502214 which also the latest tags refer to.) PiperOrigin-RevId: 566975309 --- tensorflow/tools/toolchains/remote_config/configs.bzl | 8 ++++---- third_party/xla/.kokoro/linux/cpu/build_cpu.cfg | 4 +--- third_party/xla/.kokoro/linux/gpu/build_gpu.cfg | 3 +-- .../xla/third_party/tsl/.kokoro/linux/cpu/build_cpu.cfg | 4 +--- .../tsl/tools/toolchains/remote_config/configs.bzl | 8 ++++---- .../xla/tools/toolchains/remote_config/configs.bzl | 8 ++++---- 6 files changed, 15 insertions(+), 20 deletions(-) diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index a7db31e6276f2a..0ef444f89f17d5 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -659,10 +659,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927", - "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927", - "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:fa47f1bc501983fb57e7af0e04f3c45051e42129640ef4d4a10e829d255f11ac", - "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:6935af1dd34f2f1d663ce1a6c63b3e96595ac9fefdf1e587a9bc53f2bfbf0c47", + "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. diff --git a/third_party/xla/.kokoro/linux/cpu/build_cpu.cfg b/third_party/xla/.kokoro/linux/cpu/build_cpu.cfg index 6f74611c651f3b..a58a2f918320be 100644 --- a/third_party/xla/.kokoro/linux/cpu/build_cpu.cfg +++ b/third_party/xla/.kokoro/linux/cpu/build_cpu.cfg @@ -1,7 +1,5 @@ build_file: "xla/.kokoro/linux/build.sh" env_vars: { key: "DOCKER_IMAGE" - # TODO(b/296975791): Change the tag back to `latest-python3.9` after the LLVM-17 update - value: "gcr.io/tensorflow-sigs/build:565341047-python3.9" - + value: "gcr.io/tensorflow-sigs/build:latest-python3.9" } diff --git a/third_party/xla/.kokoro/linux/gpu/build_gpu.cfg b/third_party/xla/.kokoro/linux/gpu/build_gpu.cfg index 697e639d0659f1..a58a2f918320be 100644 --- a/third_party/xla/.kokoro/linux/gpu/build_gpu.cfg +++ b/third_party/xla/.kokoro/linux/gpu/build_gpu.cfg @@ -1,6 +1,5 @@ build_file: "xla/.kokoro/linux/build.sh" env_vars: { key: "DOCKER_IMAGE" - # TODO(b/296975791): Change the tag back to `latest-python3.9` after the LLVM-17 update - value: "gcr.io/tensorflow-sigs/build:565341047-python3.9" + value: "gcr.io/tensorflow-sigs/build:latest-python3.9" } diff --git a/third_party/xla/third_party/tsl/.kokoro/linux/cpu/build_cpu.cfg b/third_party/xla/third_party/tsl/.kokoro/linux/cpu/build_cpu.cfg index fe035133997d6c..8e105be39e67c0 100644 --- a/third_party/xla/third_party/tsl/.kokoro/linux/cpu/build_cpu.cfg +++ b/third_party/xla/third_party/tsl/.kokoro/linux/cpu/build_cpu.cfg @@ -1,7 +1,5 @@ build_file: "tsl/.kokoro/linux/build.sh" env_vars: { key: "DOCKER_IMAGE" - # TODO(b/296975791): Change the tag back to `latest-python3.9` after the LLVM-17 update - value: "gcr.io/tensorflow-sigs/build:565341047-python3.9" - + value: "gcr.io/tensorflow-sigs/build:latest-python3.9" } \ No newline at end of file diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl index 4a6c6376453c76..d1e467c45c91b6 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl @@ -659,10 +659,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927", - "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927", - "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:fa47f1bc501983fb57e7af0e04f3c45051e42129640ef4d4a10e829d255f11ac", - "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:6935af1dd34f2f1d663ce1a6c63b3e96595ac9fefdf1e587a9bc53f2bfbf0c47", + "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index 4a6c6376453c76..d1e467c45c91b6 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -659,10 +659,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927", - "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:77b26125af4e2c6d7955a94d8d75f6dbb1e35a33db5bdaa915806110527ab927", - "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:fa47f1bc501983fb57e7af0e04f3c45051e42129640ef4d4a10e829d255f11ac", - "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:6935af1dd34f2f1d663ce1a6c63b3e96595ac9fefdf1e587a9bc53f2bfbf0c47", + "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. From 1f9a25d2f55aeab6c57ece9173f27fb1af3ffda2 Mon Sep 17 00:00:00 2001 From: Jake Harmon Date: Wed, 20 Sep 2023 09:15:26 -0700 Subject: [PATCH 035/567] Restore TSL/XLA headers in tensorflow/include The headers were previously in tensorflow/include/tensorflow/tsl and tensorflow/include/tensorflow/compiler/xla, but they can now be found in tensorflow/include/{tsl,xla}. C projects built against the TF wheel should now build unless they directly reference one of these headers. PiperOrigin-RevId: 566984872 --- .../tools/pip_package/build_pip_package.sh | 9 +++++++-- tensorflow/tools/pip_package/setup.py | 16 +++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index b202fe0ee5f83b..d83f2096f277a1 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -254,10 +254,15 @@ function prepare_src() { fi fi + # Move headers from TSL/XLA into tensorflow so that InstallHeaders can move + # them back into tensorflow/include + cp -rL bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_tsl/tsl/ ${TMPDIR}/tensorflow + cp -rL bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_xla/xla/ ${TMPDIR}/tensorflow/compiler # Move vendored files into proper locations # This is required because TSL/XLA don't publish their own wheels - cp -r bazel-bin/external/local_tsl/tsl/ ${TMPDIR}/tensorflow/tsl - cp -r bazel-bin/external/local_xla/xla/ ${TMPDIR}/tensorflow/compiler/xla + # TODO(jakeharmon): These two copy statements may no longer be necessary + cp -rL bazel-bin/external/local_tsl/tsl/ ${TMPDIR}/tensorflow + cp -rL bazel-bin/external/local_xla/xla/ ${TMPDIR}/tensorflow/compiler # Fix the proto stubs if is_macos; then find ${TMPDIR}/tensorflow/ -name "*.py" -type f -exec sed -i '' 's/from tsl\./from tensorflow.tsl./' {} \; diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index cfc65d92635a3c..755405fbf15eea 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -261,13 +261,19 @@ def mkdir_and_copy_file(self, header): # symlink within the directory hierarchy. # NOTE(keveman): Figure out how to customize bdist_wheel package so # we can do the symlink. - external_header_locations = [ - 'tensorflow/include/external/eigen_archive/', - 'tensorflow/include/external/com_google_absl/', - ] + # pylint: disable=line-too-long + external_header_locations = { + '/tensorflow/include/external/eigen_archive': '', + '/tensorflow/include/external/com_google_absl': '', + '/tensorflow/include/tensorflow/compiler/xla': '/tensorflow/include/xla', + '/tensorflow/include/tensorflow/tsl': '/tensorflow/include/tsl', + } + # pylint: enable=line-too-long + for location in external_header_locations: if location in install_dir: - extra_dir = install_dir.replace(location, '') + extra_dir = install_dir.replace(location, + external_header_locations[location]) if not os.path.exists(extra_dir): self.mkpath(extra_dir) self.copy_file(header, extra_dir) From 1e035a5f6055c0531f35b1ec8508a54fcf2737ca Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Wed, 20 Sep 2023 09:25:25 -0700 Subject: [PATCH 036/567] Proposal to create a cluster Bridge API for TF: V1 - Used when the bridge is called via the Function API, which roughly aligns to TF2. V0 - Used when the Bridge is called via the Session API. We're limited to not supporting infeed / outfeed ops. After this change, existing bridge.cc logic gets pulled out into here. PiperOrigin-RevId: 566987673 --- tensorflow/compiler/mlir/tf2xla/api/v1/BUILD | 23 ++++++++++ .../compiler/mlir/tf2xla/api/v1/cluster_tf.cc | 34 +++++++++++++++ .../compiler/mlir/tf2xla/api/v1/cluster_tf.h | 43 +++++++++++++++++++ .../mlir/tf2xla/api/v1/cluster_tf_test.cc | 37 ++++++++++++++++ tensorflow/compiler/mlir/tf2xla/api/v2/BUILD | 24 +++++++++++ .../compiler/mlir/tf2xla/api/v2/cluster_tf.cc | 36 ++++++++++++++++ .../compiler/mlir/tf2xla/api/v2/cluster_tf.h | 43 +++++++++++++++++++ .../mlir/tf2xla/api/v2/cluster_tf_test.cc | 38 ++++++++++++++++ 8 files changed, 278 insertions(+) create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.cc create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.h create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf_test.cc create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.cc create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.h create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf_test.cc diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/BUILD b/tensorflow/compiler/mlir/tf2xla/api/v1/BUILD index 827fb33a7874f5..08b5e4d485a6e4 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v1/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/BUILD @@ -143,3 +143,26 @@ tf_cc_test( "@local_xla//xla/client:client_library", ], ) + +cc_library( + name = "cluster_tf", + srcs = ["cluster_tf.cc"], + hdrs = ["cluster_tf.h"], + deps = [ + "//tensorflow/core:lib_proto_parsing", + "//tensorflow/core/platform:status", + "@llvm-project//mlir:IR", + "@local_tsl//tsl/platform:status", + ], +) + +tf_cc_test( + name = "cluster_tf_test", + srcs = ["cluster_tf_test.cc"], + deps = [ + ":cluster_tf", + "@com_google_googletest//:gtest_main", + "@llvm-project//mlir:IR", + "@local_tsl//tsl/lib/core:status_test_util", + ], +) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.cc b/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.cc new file mode 100644 index 00000000000000..2994242fb06730 --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.cc @@ -0,0 +1,34 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.h" + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tensorflow/core/platform/status.h" +#include "tsl/platform/status.h" + +namespace tensorflow { +namespace tf2xla { +namespace v1 { + +using mlir::ModuleOp; + +tensorflow::Status RunSessionTf2xlaClusteringBridge(ModuleOp module) { + return tsl::OkStatus(); +} + +} // namespace v1 +} // namespace tf2xla +} // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.h b/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.h new file mode 100644 index 00000000000000..02a68e741bab10 --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.h @@ -0,0 +1,43 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V1_CLUSTER_TF_H_ +#define TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V1_CLUSTER_TF_H_ + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace tf2xla { +namespace v1 { + +// Run all the passes involved in transforming the graph before execution so +// that it is suitable for targeting devices when called via the TF1 Session +// API. +// These transformations take as input a Tensorflow Graph as an MLIR Module +// and transforms the module in place to cluster the given ops for compilation +// that is compatible with the given device_type. The MLIR should be in the TF +// Executor Dialect for graph nodes and edges. Individual Op inside a node +// should be the Tensorflow Dialect. The output MLIR is in the TF Executor +// Dialect. The input MLIR should not have infeed and outfeed ops, which are +// unsupported via this API. +// Returns OkStatus if passed, otherwise an error. +tensorflow::Status RunSessionTf2xlaClusteringBridge(mlir::ModuleOp module); + +} // namespace v1 +} // namespace tf2xla +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V1_CLUSTER_TF_H_ diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf_test.cc b/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf_test.cc new file mode 100644 index 00000000000000..f5d2e9a9e947fd --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf_test.cc @@ -0,0 +1,37 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tf2xla/api/v1/cluster_tf.h" + +#include +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tsl/lib/core/status_test_util.h" + +namespace tensorflow { +namespace tf2xla { +namespace v1 { +namespace { + +using mlir::ModuleOp; + +TEST(SessionTf2xlaClusteringBridge, ClustersTf) { + ModuleOp module; + TF_ASSERT_OK(RunSessionTf2xlaClusteringBridge(module)); +} + +} // namespace +} // namespace v1 +} // namespace tf2xla +} // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD index e71842b3ff866c..acd65e27f78a34 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD @@ -89,3 +89,27 @@ tf_proto_library( srcs = ["device_type.proto"], cc_api_version = 2, ) + +cc_library( + name = "cluster_tf", + srcs = ["cluster_tf.cc"], + hdrs = ["cluster_tf.h"], + deps = [ + ":device_type_proto_cc", + "//tensorflow/core:lib_proto_parsing", + "//tensorflow/core/platform:status", + "@llvm-project//mlir:IR", + "@local_tsl//tsl/platform:status", + ], +) + +tf_cc_test( + name = "cluster_tf_test", + srcs = ["cluster_tf_test.cc"], + deps = [ + ":cluster_tf", + "@com_google_googletest//:gtest_main", + "@llvm-project//mlir:IR", + "@local_tsl//tsl/lib/core:status_test_util", + ], +) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.cc new file mode 100644 index 00000000000000..4e0c220a47589d --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.cc @@ -0,0 +1,36 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.h" + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tf2xla/api/v2/device_type.pb.h" +#include "tensorflow/core/platform/status.h" +#include "tsl/platform/status.h" + +namespace tensorflow { +namespace tf2xla { +namespace v2 { + +using mlir::ModuleOp; + +tensorflow::Status RunFunctionTf2xlaClusteringBridge(ModuleOp module, + DeviceType device_type) { + return tsl::OkStatus(); +} + +} // namespace v2 +} // namespace tf2xla +} // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.h b/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.h new file mode 100644 index 00000000000000..e25ae4ed973ce0 --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.h @@ -0,0 +1,43 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V2_CLUSTER_TF_H_ +#define TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V2_CLUSTER_TF_H_ + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tf2xla/api/v2/device_type.pb.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace tf2xla { +namespace v2 { + +// Run all the passes involved in transforming the graph before execution so +// that it is suitable for targeting devices when called with the TF 2 Function +// API. Users that need clustering with the Session API should use the v1 Bridge +// API. These transformations take as input a Tensorflow Graph as an MLIR Module +// and transforms the module in place to cluster the given ops for compilation +// that is compatible with the given device_type. The MLIR should be in the TF +// Executor Dialect for graph nodes and edges. Individual Op inside a node +// should be the Tensorflow Dialect. The output MLIR is in the TF Executor +// Dialect. Returns OkStatus if passed, otherwise an error. +tensorflow::Status RunFunctionTf2xlaClusteringBridge(mlir::ModuleOp module, + DeviceType device_type); + +} // namespace v2 +} // namespace tf2xla +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V2_CLUSTER_TF_H_ diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf_test.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf_test.cc new file mode 100644 index 00000000000000..994bf2bd69e470 --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf_test.cc @@ -0,0 +1,38 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tf2xla/api/v2/cluster_tf.h" + +#include +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tsl/lib/core/status_test_util.h" + +namespace tensorflow { +namespace tf2xla { +namespace v2 { +namespace { + +using mlir::ModuleOp; + +TEST(FunctionTf2xlaClusteringBridgeTest, ClustersTf) { + ModuleOp module; + TF_ASSERT_OK( + RunFunctionTf2xlaClusteringBridge(module, DeviceType::XLA_TPU_JIT)); +} + +} // namespace +} // namespace v2 +} // namespace tf2xla +} // namespace tensorflow From ab54126289a5ee3976bc4de6d93032cf6b750ffa Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 20 Sep 2023 09:28:34 -0700 Subject: [PATCH 037/567] [NFC] Log NCCL calls for AllToAll PiperOrigin-RevId: 566988443 --- .../xla/service/gpu/nccl_all_to_all_thunk.cc | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/third_party/xla/xla/service/gpu/nccl_all_to_all_thunk.cc b/third_party/xla/xla/service/gpu/nccl_all_to_all_thunk.cc index abaa47a29fd1fa..2dcf4ac8fe79f2 100644 --- a/third_party/xla/xla/service/gpu/nccl_all_to_all_thunk.cc +++ b/third_party/xla/xla/service/gpu/nccl_all_to_all_thunk.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/strings/str_format.h" #include "xla/service/gpu/ir_emission_utils.h" #include "xla/service/gpu/nccl_collective_thunk.h" #include "xla/shape_util.h" @@ -138,9 +139,21 @@ Status RunAllToAll(bool has_split_dimension, buffer.element_type); for (int rank = 0; rank < num_participants; ++rank) { + VLOG(3) << absl::StreamFormat( + "Calling ncclSend(sendbuff=%p, count=%d, peer=%d " + "comm=%p, stream=%p)", + send_buffer + rank * chunk_bytes, chunk_elements, rank, + static_cast(comm), gpu_stream); XLA_CUDA_RETURN_IF_ERROR(ncclSend(send_buffer + rank * chunk_bytes, chunk_elements, dtype, rank, comm, gpu_stream)); + + VLOG(3) << absl::StreamFormat( + "Calling ncclRecv(recvbuff=%p, count=%d, peer=%d " + "comm=%p, stream=%p)", + recv_buffer + rank * chunk_bytes, chunk_elements, rank, + static_cast(comm), gpu_stream); + XLA_CUDA_RETURN_IF_ERROR(ncclRecv(recv_buffer + rank * chunk_bytes, chunk_elements, dtype, rank, comm, gpu_stream)); @@ -164,8 +177,21 @@ Status RunAllToAll(bool has_split_dimension, int64_t element_count = buffer.element_count * dtype_and_multiplier.second; + VLOG(3) << absl::StreamFormat( + "Calling ncclSend(sendbuff=%p, count=%d, peer=%d " + "comm=%p, stream=%p)", + send_buffer, element_count, i, static_cast(comm), + gpu_stream); + XLA_CUDA_RETURN_IF_ERROR(ncclSend(send_buffer, element_count, dtype, /*rank=*/i, comm, gpu_stream)); + + VLOG(3) << absl::StreamFormat( + "Calling ncclRecv(recvbuff=%p, count=%d, peer=%d " + "comm=%p, stream=%p)", + recv_buffer, element_count, i, static_cast(comm), + gpu_stream); + XLA_CUDA_RETURN_IF_ERROR(ncclRecv(recv_buffer, element_count, dtype, /*rank=*/i, comm, gpu_stream)); } From 105a1a38382f2574222072d86bf4dbc8ae2fb7ae Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Wed, 20 Sep 2023 09:43:39 -0700 Subject: [PATCH 038/567] Add tf_mlrt.await_all_control op if there is any unused future. PiperOrigin-RevId: 566992749 --- .../mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir | 19 +++++++++++ .../mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc | 34 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir b/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir index 12a562bf6b6962..b468f8f88f9b1f 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir @@ -417,3 +417,22 @@ func.func @case_test(%arg0: tensor, %arg1: tensor, %arg2: tensor %0 = "tf.Case"(%arg0, %arg1, %arg2) {_lower_using_switch_merge = true, branches = [@branch0, @branch1], is_stateless = true} : (tensor, tensor, tensor) -> tensor func.return %0 : tensor } + +// ----- + +// Test await is added for unused futures + +// CHECK-LABEL: func @unused_future_arg +// CHECK-SAME: ({{%.*}}: !tf_mlrt.tensor, [[unused:%.*]]: !mlrt.future) +func.func @unused_future_arg(%x: tensor, %unused: !mlrt.future) -> tensor { + // CHECK: mlrt.await_all_control [[unused]] + return %x : tensor +} + +// CHECK-LABEL: func @unused_future +func.func @unused_future(%x: tensor) -> tensor { + // CHECK: [[unused:%.*]] = tf_mlrt.async_executeop + %unused = "tf.TestAsyncIdentity"(%x) {__op_key = 0: i32, T = i32} : (tensor) -> tensor + // CHECK: mlrt.await_all_control [[unused]] + return %x : tensor +} diff --git a/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc b/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc index 71b0d7d8403d12..603167cb0f5ef8 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc @@ -1015,6 +1015,40 @@ class TfToMlrtConversionPass op->replaceAllUsesWith(op->getOperands()); op->erase(); }); + + AddAwaitOpToUnusedFutures(module); + } + + void AddAwaitOpToUnusedFutures(mlir::ModuleOp module) { + for (auto func : module.getOps()) { + llvm::SmallVector unused_futures; + + auto is_unused_future = [](mlir::Value result) { + return llvm::isa<::mlrt::compiler::FutureType>(result.getType()) && + result.use_empty(); + }; + + for (auto arg : func.getArguments()) { + if (is_unused_future(arg)) { + unused_futures.push_back(arg); + } + } + + for (auto &op : func.getBody().front()) { + for (mlir::Value result : op.getResults()) { + if (is_unused_future(result)) { + unused_futures.push_back(result); + } + } + } + + if (!unused_futures.empty()) { + auto builder = + mlir::OpBuilder::atBlockTerminator(&func.getBody().front()); + builder.create<::mlrt::compiler::AwaitAllControlOp>(func.getLoc(), + unused_futures); + } + } } mlir::LogicalResult PostProcessFunctionSignature( From d03c477d727b93b71ac1710885c6c918d7754361 Mon Sep 17 00:00:00 2001 From: Raviteja Gorijala Date: Wed, 20 Sep 2023 09:51:53 -0700 Subject: [PATCH 039/567] Add licenses and notices for third party libraries PiperOrigin-RevId: 566995194 --- .../tools/pip_package/THIRD_PARTY_NOTICES.txt | 9346 +++++++++++++++++ 1 file changed, 9346 insertions(+) create mode 100644 tensorflow/tools/pip_package/THIRD_PARTY_NOTICES.txt diff --git a/tensorflow/tools/pip_package/THIRD_PARTY_NOTICES.txt b/tensorflow/tools/pip_package/THIRD_PARTY_NOTICES.txt new file mode 100644 index 00000000000000..c0ecfe99bcefff --- /dev/null +++ b/tensorflow/tools/pip_package/THIRD_PARTY_NOTICES.txt @@ -0,0 +1,9346 @@ +-------------------------------------------------------------------------------- +== Astunparse + + +LICENSE +======= + +Copyright (c) 2014, Simon Percivall +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +* Neither the name of AST Unparser nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are retained +in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== AbslPy + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Boring SSL + +BoringSSL is a fork of OpenSSL. As such, large parts of it fall under OpenSSL +licensing. Files that are completely new have a Google copyright and an ISC +license. This license is reproduced at the bottom of this file. + +Contributors to BoringSSL are required to follow the CLA rules for Chromium: +https://cla.developers.google.com/clas + +Files in third_party/ have their own licenses, as described therein. The MIT +license, for third_party/fiat, which, unlike other third_party directories, is +compiled into non-test libraries, is included below. + +The OpenSSL toolkit stays under a dual license, i.e. both the conditions of the +OpenSSL License and the original SSLeay license apply to the toolkit. See below +for the actual license texts. Actually both licenses are BSD-style Open Source +licenses. In case of any license issues related to OpenSSL please contact +openssl-core@openssl.org. + +The following are Google-internal bug numbers where explicit permission from +some authors is recorded for use of their work. (This is purely for our own +record keeping.) + 27287199 + 27287880 + 27287883 + 263291445 + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + + +ISC license used for completely new code in BoringSSL: + +/* Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + + +The code in third_party/fiat carries the MIT license: + +Copyright (c) 2015-2016 the fiat-crypto authors (see +https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Licenses for support code +------------------------- + +Parts of the TLS test suite are under the Go license. This code is not included +in BoringSSL (i.e. libcrypto and libssl) when compiled, however, so +distributing code linked against BoringSSL does not trigger this license: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +BoringSSL uses the Chromium test infrastructure to run a continuous build, +trybots etc. The scripts which manage this, and the script for generating build +metadata, are under the Chromium license. Distributing code linked against +BoringSSL does not trigger this license. + +Copyright 2015 The Chromium Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== cudnn-frontend + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Curl + +Files: src/... + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1996 - 2014, Daniel Stenberg, . + +All rights reserved. + +Permission to use, copy, modify, and distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright +notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization of the copyright holder. + +------------------ + +Files: src/lib/inet_ntop.c, src/lib/inet_pton.c + +Copyright (C) 1996-2019 Internet Software Consortium. + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM +DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING +FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION +WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +------------------ + +Files: src/lib/security.c, src/lib/krb5.c: unused in Google + +This source code was modified by Martin Hedenfalk for +use in Curl. His latest changes were done 2000-09-18. + +It has since been patched and modified a lot by Daniel Stenberg + to make it better applied to curl conditions, and to make +it not use globals, pollute name space and more. This source code awaits a +rewrite to work around the paragraph 2 in the BSD licenses as explained +below. + +Copyright (c) 1998, 1999, 2017 Kungliga Tekniska Högskolan +(Royal Institute of Technology, Stockholm, Sweden). + +Copyright (C) 2001 - 2019, Daniel Stenberg, , et al. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the Institute nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +------------------ + +Files: src/LICENSES/... + +BSD-4-Clause (University of California-Specific) + +Copyright [various years] The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. All advertising materials mentioning features or use of this software must display the following acknowledgement: This product includes software developed by the University of California, Berkeley and its contributors. + +4. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Dlpack + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== double_conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Eigen + +Eigen 3.3.90 +The corresponding source for this library is available at +https://third-party-mirror.googlesource.com/eigen/+/20f4d829087ef6ed6a502859c6fd093065e806d0 + +Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: + http://www.mozilla.org/MPL/2.0/ + http://www.mozilla.org/MPL/2.0/FAQ.html + +Some files contain third-party code under BSD, whence +the other COPYING.* files here. + +If you want to guarantee that the Eigen code that you are #including +is licensed under the MPL2 and possibly more permissive licenses (like +BSD), #define this preprocessor symbol: EIGEN_MPL2_ONLY +For example, with most compilers, you could add this to your project + CXXFLAGS: -DEIGEN_MPL2_ONLY +This will cause a compilation error to be generated if you #include +any code that is covered by more restrictive licences than MPL2. + +---------------------------------------------------------------------- +Following applies to: +./test/sparseqr.cpp +./test/half_float.cpp +./test/zerosized.cpp +./test/nesting_ops.cpp +./test/sizeoverflow.cpp +./test/swap.cpp +./test/product_mmtr.cpp +./test/stdvector_overload.cpp +./test/product_symm.cpp +./test/sparse_block.cpp +./test/eigen2support.cpp +./test/upperbidiagonalization.cpp +./test/numext.cpp +./test/adjoint.cpp +./test/AnnoyingScalar.h +./test/mpl2only.cpp +./test/stddeque.cpp +./test/householder.cpp +./test/product_small.cpp +./test/product_syrk.cpp +./test/inplace_decomposition.cpp +./test/vectorwiseop.cpp +./test/meta.cpp +./test/stdvector.cpp +./test/sparseLM.cpp +./test/diagonalmatrices.cpp +./test/stdlist_overload.cpp +./test/block.cpp +./test/cholmod_support.cpp +./test/basicstuff.cpp +./test/triangular.cpp +./test/product.h +./test/vectorization_logic.cpp +./test/dontalign.cpp +./test/first_aligned.cpp +./test/mapped_matrix.cpp +./test/umfpack_support.cpp +./test/product_selfadjoint.cpp +./test/smallvectors.cpp +./test/corners.cpp +./test/product_trsolve.cpp +./test/determinant.cpp +./test/stdlist.cpp +./test/unalignedcount.cpp +./test/qr.cpp +./test/svd_common.h +./test/ref.cpp +./test/symbolic_index.cpp +./test/geo_transformations.cpp +./test/geo_eulerangles.cpp +./test/eigensolver_selfadjoint.cpp +./test/stddeque_overload.cpp +./test/jacobisvd.cpp +./test/nullary.cpp +./test/inverse.cpp +./test/integer_types.cpp +./test/metis_support.cpp +./test/exceptions.cpp +./test/packetmath.cpp +./test/schur_complex.cpp +./test/type_alias.cpp +./test/unalignedassert.cpp +./test/geo_quaternion.cpp +./test/lu.cpp +./test/qr_fullpivoting.cpp +./test/denseLM.cpp +./test/linearstructure.cpp +./test/rand.cpp +./test/conservative_resize.cpp +./test/eigensolver_generalized_real.cpp +./test/pastix_support.cpp +./test/sparse_solver.h +./test/num_dimensions.cpp +./test/simplicial_cholesky.cpp +./test/hessenberg.cpp +./test/array_reverse.cpp +./test/special_numbers.cpp +./test/array_for_matrix.cpp +./test/product_large.cpp +./test/resize.cpp +./test/sparse_solvers.cpp +./test/selfadjoint.cpp +./test/schur_real.cpp +./test/sparse_basic.cpp +./test/conjugate_gradient.cpp +./test/real_qz.cpp +./test/bandmatrix.cpp +./test/dense_storage.cpp +./test/permutationmatrices.cpp +./test/array_cwise.cpp +./test/qr_colpivoting.cpp +./test/array_replicate.cpp +./test/rvalue_types.cpp +./test/stable_norm.cpp +./test/geo_homogeneous.cpp +./test/main.h +./test/eigensolver_complex.cpp +./test/product_trmm.cpp +./test/bicgstab.cpp +./test/redux.cpp +./test/klu_support.cpp +./test/geo_alignedbox.cpp +./test/is_same_dense.cpp +./test/sparse_permutations.cpp +./test/sparse_vector.cpp +./test/diagonal.cpp +./test/sparse.h +./test/mapstride.cpp +./test/visitor.cpp +./test/geo_hyperplane.cpp +./test/bdcsvd.cpp +./test/product_trmv.cpp +./test/nestbyvalue.cpp +./test/array_of_string.cpp +./test/superlu_support.cpp +./test/sizeof.cpp +./test/boostmultiprec.cpp +./test/commainitializer.cpp +./test/constructor.cpp +./test/mixingtypes.cpp +./test/miscmatrices.cpp +./test/mapstaticmethods.cpp +./test/product_notemporary.cpp +./test/initializer_list_construction.cpp +./test/incomplete_cholesky.cpp +./test/geo_parametrizedline.cpp +./test/indexed_view.cpp +./test/qtvector.cpp +./test/sparselu.cpp +./test/sparse_product.cpp +./test/dynalloc.cpp +./test/fastmath.cpp +./test/prec_inverse_4x4.cpp +./test/umeyama.cpp +./test/reshape.cpp +./test/product_extra.cpp +./test/jacobi.cpp +./test/sparse_ref.cpp +./test/nomalloc.cpp +./test/spqr_support.cpp +./test/lscg.cpp +./test/cholesky.cpp +./test/eigensolver_generic.cpp +./test/geo_orthomethods.cpp +./test/svd_fill.h +./test/stl_iterators.cpp +./Eigen/src/MetisSupport/MetisSupport.h +./Eigen/src/CholmodSupport/CholmodSupport.h +./Eigen/src/QR/CompleteOrthogonalDecomposition.h +./Eigen/src/QR/FullPivHouseholderQR.h +./Eigen/src/QR/HouseholderQR.h +./Eigen/src/QR/ColPivHouseholderQR.h +./Eigen/src/plugins/CommonCwiseUnaryOps.h +./Eigen/src/plugins/BlockMethods.h +./Eigen/src/plugins/CommonCwiseBinaryOps.h +./Eigen/src/plugins/MatrixCwiseUnaryOps.h +./Eigen/src/plugins/IndexedViewMethods.h +./Eigen/src/plugins/MatrixCwiseBinaryOps.h +./Eigen/src/SVD/UpperBidiagonalization.h +./Eigen/src/SVD/SVDBase.h +./Eigen/src/SVD/BDCSVD.h +./Eigen/src/SVD/JacobiSVD.h +./Eigen/src/SparseLU/SparseLU_relax_snode.h +./Eigen/src/SparseLU/SparseLU_column_dfs.h +./Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +./Eigen/src/SparseLU/SparseLU_pivotL.h +./Eigen/src/SparseLU/SparseLU.h +./Eigen/src/SparseLU/SparseLU_pruneL.h +./Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +./Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +./Eigen/src/SparseLU/SparseLU_kernel_bmod.h +./Eigen/src/SparseLU/SparseLU_panel_dfs.h +./Eigen/src/SparseLU/SparseLU_panel_bmod.h +./Eigen/src/SparseLU/SparseLU_Structs.h +./Eigen/src/SparseLU/SparseLUImpl.h +./Eigen/src/SparseLU/SparseLU_Memory.h +./Eigen/src/SparseLU/SparseLU_column_bmod.h +./Eigen/src/SparseLU/SparseLU_gemm_kernel.h +./Eigen/src/SparseLU/SparseLU_Utils.h +./Eigen/src/OrderingMethods/Eigen_Colamd.h +./Eigen/src/OrderingMethods/Ordering.h +./Eigen/src/OrderingMethods/Amd.h +./Eigen/src/UmfPackSupport/UmfPackSupport.h +./Eigen/src/Geometry/Umeyama.h +./Eigen/src/Geometry/Transform.h +./Eigen/src/Geometry/OrthoMethods.h +./Eigen/src/Geometry/Hyperplane.h +./Eigen/src/Geometry/Homogeneous.h +./Eigen/src/Geometry/RotationBase.h +./Eigen/src/Geometry/EulerAngles.h +./Eigen/src/Geometry/Translation.h +./Eigen/src/Geometry/Rotation2D.h +./Eigen/src/Geometry/Scaling.h +./Eigen/src/Geometry/AlignedBox.h +./Eigen/src/Geometry/ParametrizedLine.h +./Eigen/src/Geometry/Quaternion.h +./Eigen/src/Geometry/AngleAxis.h +./Eigen/src/Geometry/arch/Geometry_SSE.h +./Eigen/src/KLUSupport/KLUSupport.h +./Eigen/src/misc/Kernel.h +./Eigen/src/misc/RealSvd2x2.h +./Eigen/src/misc/Image.h +./Eigen/src/StlSupport/details.h +./Eigen/src/StlSupport/StdList.h +./Eigen/src/StlSupport/StdDeque.h +./Eigen/src/StlSupport/StdVector.h +./Eigen/src/SparseQR/SparseQR.h +./Eigen/src/SuperLUSupport/SuperLUSupport.h +./Eigen/src/Householder/Householder.h +./Eigen/src/Householder/HouseholderSequence.h +./Eigen/src/Householder/BlockHouseholder.h +./Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +./Eigen/src/Eigenvalues/EigenSolver.h +./Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +./Eigen/src/Eigenvalues/Tridiagonalization.h +./Eigen/src/Eigenvalues/HessenbergDecomposition.h +./Eigen/src/Eigenvalues/RealQZ.h +./Eigen/src/Eigenvalues/RealSchur.h +./Eigen/src/Eigenvalues/ComplexSchur.h +./Eigen/src/Eigenvalues/ComplexEigenSolver.h +./Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +./Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +./Eigen/src/SparseCholesky/SimplicialCholesky.h +./Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +./Eigen/src/Cholesky/LLT.h +./Eigen/src/Cholesky/LDLT.h +./Eigen/src/Jacobi/Jacobi.h +./Eigen/src/PaStiXSupport/PaStiXSupport.h +./Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +./Eigen/src/LU/Determinant.h +./Eigen/src/LU/InverseImpl.h +./Eigen/src/LU/PartialPivLU.h +./Eigen/src/LU/arch/Inverse_SSE.h +./Eigen/src/LU/FullPivLU.h +./Eigen/src/Core/Map.h +./Eigen/src/Core/VectorwiseOp.h +./Eigen/src/Core/VectorBlock.h +./Eigen/src/Core/Array.h +./Eigen/src/Core/Assign.h +./Eigen/src/Core/Dot.h +./Eigen/src/Core/NestByValue.h +./Eigen/src/Core/CoreEvaluators.h +./Eigen/src/Core/ReturnByValue.h +./Eigen/src/Core/SelfCwiseBinaryOp.h +./Eigen/src/Core/GlobalFunctions.h +./Eigen/src/Core/Transpositions.h +./Eigen/src/Core/Fuzzy.h +./Eigen/src/Core/NoAlias.h +./Eigen/src/Core/CwiseNullaryOp.h +./Eigen/src/Core/NumTraits.h +./Eigen/src/Core/IndexedView.h +./Eigen/src/Core/ArrayWrapper.h +./Eigen/src/Core/util/SymbolicIndex.h +./Eigen/src/Core/util/BlasUtil.h +./Eigen/src/Core/util/Constants.h +./Eigen/src/Core/util/IntegralConstant.h +./Eigen/src/Core/util/ReshapedHelper.h +./Eigen/src/Core/util/StaticAssert.h +./Eigen/src/Core/util/IndexedViewHelper.h +./Eigen/src/Core/util/ConfigureVectorization.h +./Eigen/src/Core/util/ForwardDeclarations.h +./Eigen/src/Core/util/Meta.h +./Eigen/src/Core/util/XprHelper.h +./Eigen/src/Core/util/Macros.h +./Eigen/src/Core/util/Memory.h +./Eigen/src/Core/Product.h +./Eigen/src/Core/Replicate.h +./Eigen/src/Core/ArrayBase.h +./Eigen/src/Core/functors/NullaryFunctors.h +./Eigen/src/Core/functors/StlFunctors.h +./Eigen/src/Core/functors/AssignmentFunctors.h +./Eigen/src/Core/functors/UnaryFunctors.h +./Eigen/src/Core/functors/TernaryFunctors.h +./Eigen/src/Core/functors/BinaryFunctors.h +./Eigen/src/Core/Redux.h +./Eigen/src/Core/EigenBase.h +./Eigen/src/Core/SolverBase.h +./Eigen/src/Core/ProductEvaluators.h +./Eigen/src/Core/Block.h +./Eigen/src/Core/SolveTriangular.h +./Eigen/src/Core/ArithmeticSequence.h +./Eigen/src/Core/MatrixBase.h +./Eigen/src/Core/PlainObjectBase.h +./Eigen/src/Core/Transpose.h +./Eigen/src/Core/IO.h +./Eigen/src/Core/MathFunctions.h +./Eigen/src/Core/Stride.h +./Eigen/src/Core/MathFunctionsImpl.h +./Eigen/src/Core/StableNorm.h +./Eigen/src/Core/DiagonalProduct.h +./Eigen/src/Core/products/GeneralMatrixMatrix.h +./Eigen/src/Core/products/GeneralMatrixVector.h +./Eigen/src/Core/products/SelfadjointMatrixVector.h +./Eigen/src/Core/products/GeneralBlockPanelKernel.h +./Eigen/src/Core/products/TriangularSolverMatrix.h +./Eigen/src/Core/products/SelfadjointMatrixMatrix.h +./Eigen/src/Core/products/Parallelizer.h +./Eigen/src/Core/products/SelfadjointRank2Update.h +./Eigen/src/Core/products/TriangularMatrixMatrix.h +./Eigen/src/Core/products/TriangularMatrixVector.h +./Eigen/src/Core/products/SelfadjointProduct.h +./Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +./Eigen/src/Core/products/TriangularSolverVector.h +./Eigen/src/Core/CwiseUnaryView.h +./Eigen/src/Core/CommaInitializer.h +./Eigen/src/Core/DenseStorage.h +./Eigen/src/Core/DenseBase.h +./Eigen/src/Core/PartialReduxEvaluator.h +./Eigen/src/Core/CoreIterators.h +./Eigen/src/Core/PermutationMatrix.h +./Eigen/src/Core/CwiseTernaryOp.h +./Eigen/src/Core/Reverse.h +./Eigen/src/Core/Reshaped.h +./Eigen/src/Core/Inverse.h +./Eigen/src/Core/TriangularMatrix.h +./Eigen/src/Core/BooleanRedux.h +./Eigen/src/Core/ForceAlignedAccess.h +./Eigen/src/Core/Ref.h +./Eigen/src/Core/StlIterators.h +./Eigen/src/Core/BandMatrix.h +./Eigen/src/Core/ConditionEstimator.h +./Eigen/src/Core/Diagonal.h +./Eigen/src/Core/DiagonalMatrix.h +./Eigen/src/Core/AssignEvaluator.h +./Eigen/src/Core/CwiseBinaryOp.h +./Eigen/src/Core/Visitor.h +./Eigen/src/Core/GenericPacketMath.h +./Eigen/src/Core/SelfAdjointView.h +./Eigen/src/Core/Random.h +./Eigen/src/Core/Solve.h +./Eigen/src/Core/arch/AltiVec/MathFunctions.h +./Eigen/src/Core/arch/AltiVec/PacketMath.h +./Eigen/src/Core/arch/AltiVec/Complex.h +./Eigen/src/Core/arch/MSA/MathFunctions.h +./Eigen/src/Core/arch/MSA/Complex.h +./Eigen/src/Core/arch/MSA/PacketMath.h +./Eigen/src/Core/arch/GPU/Half.h +./Eigen/src/Core/arch/GPU/PacketMathHalf.h +./Eigen/src/Core/arch/GPU/MathFunctions.h +./Eigen/src/Core/arch/GPU/PacketMath.h +./Eigen/src/Core/arch/GPU/TypeCasting.h +./Eigen/src/Core/arch/NEON/MathFunctions.h +./Eigen/src/Core/arch/NEON/Complex.h +./Eigen/src/Core/arch/NEON/PacketMath.h +./Eigen/src/Core/arch/NEON/TypeCasting.h +./Eigen/src/Core/arch/AVX/MathFunctions.h +./Eigen/src/Core/arch/AVX/TypeCasting.h +./Eigen/src/Core/arch/AVX/Complex.h +./Eigen/src/Core/arch/AVX/PacketMath.h +./Eigen/src/Core/arch/SYCL/InteropHeaders.h +./Eigen/src/Core/arch/SYCL/PacketMath.h +./Eigen/src/Core/arch/SYCL/TypeCasting.h +./Eigen/src/Core/arch/SYCL/MathFunctions.h +./Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +./Eigen/src/Core/arch/Default/ConjHelper.h +./Eigen/src/Core/arch/Default/Settings.h +./Eigen/src/Core/arch/AVX512/MathFunctions.h +./Eigen/src/Core/arch/AVX512/PacketMath.h +./Eigen/src/Core/arch/AVX512/Complex.h +./Eigen/src/Core/arch/SSE/PacketMath.h +./Eigen/src/Core/arch/SSE/Complex.h +./Eigen/src/Core/arch/SSE/TypeCasting.h +./Eigen/src/Core/arch/SSE/MathFunctions.h +./Eigen/src/Core/arch/ZVector/MathFunctions.h +./Eigen/src/Core/arch/ZVector/PacketMath.h +./Eigen/src/Core/arch/ZVector/Complex.h +./Eigen/src/Core/arch/CUDA/Complex.h +./Eigen/src/Core/Swap.h +./Eigen/src/Core/MapBase.h +./Eigen/src/Core/GeneralProduct.h +./Eigen/src/Core/Matrix.h +./Eigen/src/Core/Select.h +./Eigen/src/Core/CwiseUnaryOp.h +./Eigen/src/Core/DenseCoeffsBase.h +./Eigen/src/SparseCore/SparseCwiseUnaryOp.h +./Eigen/src/SparseCore/TriangularSolver.h +./Eigen/src/SparseCore/SparseView.h +./Eigen/src/SparseCore/SparseSolverBase.h +./Eigen/src/SparseCore/SparseTranspose.h +./Eigen/src/SparseCore/SparseDenseProduct.h +./Eigen/src/SparseCore/SparseMap.h +./Eigen/src/SparseCore/SparseProduct.h +./Eigen/src/SparseCore/SparseUtil.h +./Eigen/src/SparseCore/SparsePermutation.h +./Eigen/src/SparseCore/SparseTriangularView.h +./Eigen/src/SparseCore/SparseSelfAdjointView.h +./Eigen/src/SparseCore/SparseMatrixBase.h +./Eigen/src/SparseCore/AmbiVector.h +./Eigen/src/SparseCore/SparseAssign.h +./Eigen/src/SparseCore/SparseRedux.h +./Eigen/src/SparseCore/SparseDot.h +./Eigen/src/SparseCore/SparseCwiseBinaryOp.h +./Eigen/src/SparseCore/SparseCompressedBase.h +./Eigen/src/SparseCore/SparseSparseProductWithPruning.h +./Eigen/src/SparseCore/SparseColEtree.h +./Eigen/src/SparseCore/SparseRef.h +./Eigen/src/SparseCore/CompressedStorage.h +./Eigen/src/SparseCore/MappedSparseMatrix.h +./Eigen/src/SparseCore/SparseDiagonalProduct.h +./Eigen/src/SparseCore/SparseFuzzy.h +./Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +./Eigen/src/SparseCore/SparseMatrix.h +./Eigen/src/SparseCore/SparseVector.h +./Eigen/src/SparseCore/SparseBlock.h +./Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +./Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +./Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +./Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +./Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +./Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +./Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +./Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +./unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +./unsupported/Eigen/src/SpecialFunctions/arch/GPU/GpuSpecialFunctions.h +./unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +./unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +./unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +./unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +./unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +./unsupported/Eigen/src/Polynomials/Companion.h +./unsupported/Eigen/src/Polynomials/PolynomialUtils.h +./unsupported/Eigen/src/Polynomials/PolynomialSolver.h +./unsupported/Eigen/src/Splines/Spline.h +./unsupported/Eigen/src/Splines/SplineFwd.h +./unsupported/Eigen/src/Splines/SplineFitting.h +./unsupported/Eigen/src/BVH/KdBVH.h +./unsupported/Eigen/src/BVH/BVAlgorithms.h +./unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +./unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +./unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +./unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +./unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +./unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +./unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +./unsupported/Eigen/src/MatrixFunctions/StemFunction.h +./unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +./unsupported/Eigen/src/Skyline/SkylineStorage.h +./unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +./unsupported/Eigen/src/Skyline/SkylineMatrix.h +./unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +./unsupported/Eigen/src/Skyline/SkylineProduct.h +./unsupported/Eigen/src/Skyline/SkylineUtil.h +./unsupported/Eigen/src/FFT/ei_kissfft_impl.h +./unsupported/Eigen/src/FFT/ei_fftw_impl.h +./unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +./unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +./unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +./unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +./unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +./unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +./unsupported/Eigen/src/IterativeSolvers/MINRES.h +./unsupported/Eigen/src/IterativeSolvers/DGMRES.h +./unsupported/Eigen/src/IterativeSolvers/Scaling.h +./unsupported/Eigen/src/IterativeSolvers/GMRES.h +./unsupported/Eigen/src/MoreVectorization/MathFunctions.h +./unsupported/Eigen/src/EulerAngles/EulerAngles.h +./unsupported/Eigen/src/EulerAngles/EulerSystem.h +./unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +./unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +./unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +./unsupported/Eigen/src/SparseExtra/RandomSetter.h +./unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +./unsupported/Eigen/src/SparseExtra/MarketIO.h +./unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +./unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +./unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +./unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +./unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h +./unsupported/Eigen/CXX11/src/util/CXX11Meta.h +./unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +./unsupported/Eigen/CXX11/src/util/EmulateArray.h +./unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +./unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +./unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +./unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +./unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +./unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +./unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +./unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +./unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +./unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +./unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +./unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h +./unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +./unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +./unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +./unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +./unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +./unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +./unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +./unsupported/Eigen/CXX11/src/Tensor/Tensor.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +./unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +./unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +./unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +./unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +./unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +./unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +./unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +./unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +./unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +./unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h +./unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +./unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +./unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +./unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +./unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +./unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +./unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +./unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +./unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +./unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +./unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +./unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +./unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h +./unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +./unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +./unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +./unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +./unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +./unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +./unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +./unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +./unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +./unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +./unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +./unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +./unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +./unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +./unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +./unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +./unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +./unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h +./unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +./unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +./unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +./unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +./unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +./unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +./unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +./unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +./unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +./unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +./unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h +./unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h +./unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h +./unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h +./unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h +./unsupported/bench/bench_svd.cpp +./unsupported/test/cxx11_tensor_image_patch_sycl.cpp +./unsupported/test/cxx11_tensor_expr.cpp +./unsupported/test/FFTW.cpp +./unsupported/test/cxx11_tensor_reverse_sycl.cpp +./unsupported/test/cxx11_tensor_comparisons.cpp +./unsupported/test/cxx11_tensor_intdiv.cpp +./unsupported/test/autodiff.cpp +./unsupported/test/cxx11_tensor_executor.cpp +./unsupported/test/cxx11_tensor_reduction.cpp +./unsupported/test/cxx11_tensor_device_sycl.cpp +./unsupported/test/minres.cpp +./unsupported/test/cxx11_tensor_striding.cpp +./unsupported/test/cxx11_tensor_chipping.cpp +./unsupported/test/cxx11_tensor_convolution_sycl.cpp +./unsupported/test/openglsupport.cpp +./unsupported/test/cxx11_tensor_ifft.cpp +./unsupported/test/polynomialutils.cpp +./unsupported/test/cxx11_tensor_block_access.cpp +./unsupported/test/cxx11_tensor_block_eval.cpp +./unsupported/test/cxx11_tensor_block_io.cpp +./unsupported/test/cxx11_tensor_morphing.cpp +./unsupported/test/cxx11_tensor_casts.cpp +./unsupported/test/cxx11_tensor_shuffling_sycl.cpp +./unsupported/test/cxx11_tensor_morphing_sycl.cpp +./unsupported/test/forward_adolc.cpp +./unsupported/test/cxx11_tensor_layout_swap.cpp +./unsupported/test/cxx11_tensor_move.cpp +./unsupported/test/EulerAngles.cpp +./unsupported/test/cxx11_tensor_trace.cpp +./unsupported/test/alignedvector3.cpp +./unsupported/test/cxx11_tensor_lvalue.cpp +./unsupported/test/cxx11_tensor_argmax.cpp +./unsupported/test/cxx11_tensor_broadcast_sycl.cpp +./unsupported/test/autodiff_scalar.cpp +./unsupported/test/sparse_extra.cpp +./unsupported/test/cxx11_tensor_of_strings.cpp +./unsupported/test/cxx11_tensor_empty.cpp +./unsupported/test/cxx11_tensor_patch.cpp +./unsupported/test/cxx11_tensor_sycl.cpp +./unsupported/test/cxx11_tensor_forced_eval_sycl.cpp +./unsupported/test/cxx11_tensor_inflation_sycl.cpp +./unsupported/test/BVH.cpp +./unsupported/test/cxx11_tensor_generator.cpp +./unsupported/test/cxx11_meta.cpp +./unsupported/test/matrix_functions.h +./unsupported/test/kronecker_product.cpp +./unsupported/test/matrix_function.cpp +./unsupported/test/cxx11_tensor_thread_pool.cpp +./unsupported/test/cxx11_non_blocking_thread_pool.cpp +./unsupported/test/cxx11_tensor_fft.cpp +./unsupported/test/cxx11_tensor_assign.cpp +./unsupported/test/cxx11_tensor_simple.cpp +./unsupported/test/cxx11_tensor_of_complex.cpp +./unsupported/test/cxx11_tensor_inflation.cpp +./unsupported/test/cxx11_tensor_map.cpp +./unsupported/test/cxx11_tensor_shuffling.cpp +./unsupported/test/cxx11_tensor_padding.cpp +./unsupported/test/cxx11_tensor_argmax_sycl.cpp +./unsupported/test/matrix_square_root.cpp +./unsupported/test/dgmres.cpp +./unsupported/test/cxx11_tensor_custom_op_sycl.cpp +./unsupported/test/cxx11_tensor_reduction_sycl.cpp +./unsupported/test/cxx11_runqueue.cpp +./unsupported/test/cxx11_tensor_const.cpp +./unsupported/test/matrix_power.cpp +./unsupported/test/cxx11_tensor_contraction.cpp +./unsupported/test/cxx11_tensor_random.cpp +./unsupported/test/cxx11_tensor_volume_patch_sycl.cpp +./unsupported/test/cxx11_tensor_contract_sycl.cpp +./unsupported/test/cxx11_tensor_math.cpp +./unsupported/test/splines.cpp +./unsupported/test/cxx11_tensor_ref.cpp +./unsupported/test/cxx11_tensor_concatenation_sycl.cpp +./unsupported/test/gmres.cpp +./unsupported/test/cxx11_tensor_fixed_size.cpp +./unsupported/test/cxx11_tensor_custom_op.cpp +./unsupported/test/cxx11_tensor_generator_sycl.cpp +./unsupported/test/cxx11_tensor_uint128.cpp +./unsupported/test/cxx11_tensor_builtins_sycl.cpp +./unsupported/test/polynomialsolver.cpp +./unsupported/test/cxx11_tensor_concatenation.cpp +./unsupported/test/cxx11_tensor_broadcasting.cpp +./unsupported/test/cxx11_tensor_convolution.cpp +./unsupported/test/cxx11_tensor_forced_eval.cpp +./unsupported/test/levenberg_marquardt.cpp +./unsupported/test/cxx11_tensor_reverse.cpp +./unsupported/test/cxx11_tensor_notification.cpp +./unsupported/test/cxx11_tensor_patch_sycl.cpp +./unsupported/test/cxx11_tensor_image_patch.cpp +./unsupported/test/cxx11_tensor_scan.cpp +./unsupported/test/cxx11_tensor_padding_sycl.cpp +./unsupported/test/cxx11_tensor_index_list.cpp +./unsupported/test/cxx11_tensor_io.cpp +./unsupported/test/cxx11_tensor_mixed_indices.cpp +./unsupported/test/cxx11_tensor_striding_sycl.cpp +./unsupported/test/cxx11_tensor_of_const_values.cpp +./unsupported/test/cxx11_tensor_symmetry.cpp +./unsupported/test/cxx11_tensor_custom_index.cpp +./unsupported/test/cxx11_tensor_chipping_sycl.cpp +./unsupported/test/cxx11_tensor_roundings.cpp +./unsupported/test/matrix_exponential.cpp +./unsupported/test/cxx11_eventcount.cpp +./unsupported/test/special_functions.cpp +./unsupported/test/cxx11_tensor_dimension.cpp +./unsupported/test/cxx11_tensor_layout_swap_sycl.cpp +./lapack/eigenvalues.cpp +./lapack/single.cpp +./lapack/svd.cpp +./lapack/complex_single.cpp +./lapack/lu.cpp +./lapack/double.cpp +./lapack/complex_double.cpp +./lapack/cholesky.cpp +./lapack/lapack_common.h +./blas/level2_impl.h +./blas/PackedTriangularMatrixVector.h +./blas/level3_impl.h +./blas/complex_double.cpp +./blas/common.h +./blas/GeneralRank1Update.h +./blas/double.cpp +./blas/complex_single.cpp +./blas/Rank2Update.h +./blas/level1_impl.h +./blas/level2_real_impl.h +./blas/level1_real_impl.h +./blas/single.cpp +./blas/PackedSelfadjointProduct.h +./blas/BandTriangularSolver.h +./blas/level2_cplx_impl.h +./blas/PackedTriangularSolverVector.h +./blas/level1_cplx_impl.h +./bench/analyze-blocking-sizes.cpp +./bench/BenchTimer.h +./bench/spbench/spbenchsolver.h +./bench/spbench/spbenchstyle.h +./bench/benchFFT.cpp +./bench/eig33.cpp +./bench/benchmark-blocking-sizes.cpp +./demos/opengl/quaternion_demo.cpp +./demos/opengl/camera.h +./demos/opengl/gpuhelper.cpp +./demos/opengl/gpuhelper.h +./demos/opengl/icosphere.cpp +./demos/opengl/quaternion_demo.h +./demos/opengl/trackball.h +./demos/opengl/icosphere.h +./demos/opengl/camera.cpp +./demos/opengl/trackball.cpp +./demos/mix_eigen_and_c/binary_library.h +./demos/mix_eigen_and_c/binary_library.cpp +./demos/mandelbrot/mandelbrot.cpp +./demos/mandelbrot/mandelbrot.h + +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. + +---------------------------------------------------------------------- +Following applies to: +./doc/UsingIntelMKL.dox +./doc/UsingIntelMKL.dox +./Eigen/src/Eigenvalues/ComplexSchur_MKL.h +./Eigen/src/Eigenvalues/ComplexSchur_MKL.h +./Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +./Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +./Eigen/src/Eigenvalues/RealSchur_MKL.h +./Eigen/src/Eigenvalues/RealSchur_MKL.h +./Eigen/src/LU/arch/Inverse_SSE.h +./Eigen/src/LU/arch/Inverse_SSE.h +./Eigen/src/LU/PartialPivLU_MKL.h +./Eigen/src/LU/PartialPivLU_MKL.h +./Eigen/src/QR/HouseholderQR_MKL.h +./Eigen/src/QR/HouseholderQR_MKL.h +./Eigen/src/QR/ColPivHouseholderQR_MKL.h +./Eigen/src/QR/ColPivHouseholderQR_MKL.h +./Eigen/src/SVD/JacobiSVD_MKL.h +./Eigen/src/SVD/JacobiSVD_MKL.h +./Eigen/src/PardisoSupport/PardisoSupport.h +./Eigen/src/PardisoSupport/PardisoSupport.h +./Eigen/src/Core/Assign_MKL.h +./Eigen/src/Core/Assign_MKL.h +./Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +./Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +./Eigen/src/Core/products/GeneralMatrixVector_MKL.h +./Eigen/src/Core/products/GeneralMatrixVector_MKL.h +./Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +./Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +./Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +./Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +./Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +./Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +./Eigen/src/Core/products/TriangularMatrixVector_MKL.h +./Eigen/src/Core/products/TriangularMatrixVector_MKL.h +./Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +./Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +./Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +./Eigen/src/Core/products/TriangularSolverMatrix_MKL.h +./Eigen/src/Core/util/MKL_support.h +./Eigen/src/Core/util/MKL_support.h +./Eigen/src/Cholesky/LLT_MKL.h +./Eigen/src/Cholesky/LLT_MKL.h + +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. * Neither the name of Intel Corporation nor the + names of its contributors may be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +---------------------------------------------------------------------- +Following applies to: +./unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +./unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +./unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +./unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +./unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h + +Minpack Copyright Notice (1999) University of Chicago. All rights +reserved + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above +copyright notice, this list of conditions and the following +disclaimer. + +2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials +provided with the distribution. + +3. The end-user documentation included with the +redistribution, if any, must include the following +acknowledgment: + + "This product includes software developed by the + University of Chicago, as Operator of Argonne National + Laboratory. + +Alternately, this acknowledgment may appear in the software +itself, if and wherever such third-party acknowledgments +normally appear. + +4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +BE CORRECTED. + +5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +POSSIBILITY OF SUCH LOSS OR DAMAGES. + + +Copyright (c) 1992-2013 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2000-2013 The University of California Berkeley. All + rights reserved. +Copyright (c) 2006-2013 The University of Colorado Denver. All rights + reserved. + +Following applies to: +./lapack/*.c + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + +- Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +The copyright holders provide no reassurances that the source code +provided does not infringe any patent, copyright, or any other +intellectual property rights of third parties. The copyright holders +disclaim any liability to any recipient for claims brought against +recipient by any third party for infringement of that parties +intellectual property rights. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- +Following applies to: + +./cmake/FindComputeCpp.cmake + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Farmhash + +// Copyright (c) 2014 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Flatbuffers + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Gast + +Copyright (c) 2016, Serge Guelton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of HPCProject, Serge Guelton nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Gemmlowp + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== google-pasta + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== grpc + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== grpcio + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +----------------------------------------------------------- + +BSD 3-Clause License + +Copyright 2016, Google Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + +----------------------------------------------------------- + +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== H5py + +Copyright Notice and Statement for the h5py Project +=================================================== + + Copyright (c) 2008-2013 Andrew Collette and contributors + http://www.h5py.org + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + + c. Neither the name of the author nor the names of contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------- + +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 2006-2007 by The HDF Group (THG). + +NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 1998-2006 by the Board of Trustees of the University of Illinois. + +All rights reserved. + +Contributors: National Center for Supercomputing Applications (NCSA) +at the University of Illinois, Fortner Software, Unidata Program +Center (netCDF), The Independent JPEG Group (JPEG), Jean-loup Gailly +and Mark Adler (gzip), and Digital Equipment Corporation (DEC). + +Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial +purposes) provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright +notice, this list of conditions, and the following disclaimer. + 2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions, and the following +disclaimer in the documentation and/or materials provided with the +distribution. + 3. In addition, redistributions of modified forms of the source or +binary code must carry prominent notices stating that the original +code was changed and the date of the change. + 4. All publications or advertising materials mentioning features or +use of this software are asked, but not required, to acknowledge that +it was developed by The HDF Group and by the National Center for +Supercomputing Applications at the University of Illinois at +Urbana-Champaign and credit the contributors. + 5. Neither the name of The HDF Group, the name of the University, +nor the name of any Contributor may be used to endorse or promote +products derived from this software without specific prior written +permission from THG, the University, or the Contributor, respectively. + +DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE HDF GROUP (THG) AND THE +CONTRIBUTORS "AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR +IMPLIED. In no event shall THG or the Contributors be liable for any +damages suffered by the users arising out of the use of this software, +even if advised of the possibility of such damage. + +Portions of HDF5 were developed with support from the University of +California, Lawrence Livermore National Laboratory (UC LLNL). The +following statement applies to those portions of the product and must +be retained in any redistribution of source code, binaries, +documentation, and/or accompanying materials: + +This work was partially produced at the University of California, +Lawrence Livermore National Laboratory (UC LLNL) under contract +no. W-7405-ENG-48 (Contract 48) between the U.S. Department of Energy +(DOE) and The Regents of the University of California (University) for +the operation of UC LLNL. + +DISCLAIMER: This work was prepared as an account of work sponsored by +an agency of the United States Government. Neither the United States +Government nor the University of California nor any of their +employees, makes any warranty, express or implied, or assumes any +liability or responsibility for the accuracy, completeness, or +usefulness of any information, apparatus, product, or process +disclosed, or represents that its use would not infringe privately- +owned rights. Reference herein to any specific commercial products, +process, or service by trade name, trademark, manufacturer, or +otherwise, does not necessarily constitute or imply its endorsement, +recommendation, or favoring by the United States Government or the +University of California. The views and opinions of authors expressed +herein do not necessarily state or reflect those of the United States +Government or the University of California, and shall not be used for +advertising or product endorsement purposes. + +------------------------------------------------------------------------------- + +Copyright Notice and Statement for PyTables Software Library and Utilities: + +Copyright (c) 2002, 2003, 2004 Francesc Altet +Copyright (c) 2005, 2006, 2007 Carabos Coop. V. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +c. Neither the name of the Carabos Coop. V. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Highwayhash + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== icu + +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE + +See Terms of Use +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2023 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +---------------------------------------------------------------------- + +Third-Party Software Licenses + +This section contains third-party software notices and/or additional +terms for licensed third-party software components included within ICU +libraries. + +---------------------------------------------------------------------- + +ICU License - ICU 1.8.1 to ICU 57.1 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2016 International Business Machines Corporation and others +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY +SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, use +or other dealings in this Software without prior written authorization +of the copyright holder. + +All trademarks and registered trademarks mentioned herein are the +property of their respective owners. + +---------------------------------------------------------------------- + +Chinese/Japanese Word Break Dictionary Data (cjdict.txt) + + # The Google Chrome software developed by Google is licensed under + # the BSD license. Other software included in this distribution is + # provided under other licenses, as set forth below. + # + # The BSD License + # http://opensource.org/licenses/bsd-license.php + # Copyright (C) 2006-2008, Google Inc. + # + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # Redistributions of source code must retain the above copyright notice, + # this list of conditions and the following disclaimer. + # Redistributions in binary form must reproduce the above + # copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided with + # the distribution. + # Neither the name of Google Inc. nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # + # + # The word list in cjdict.txt are generated by combining three word lists + # listed below with further processing for compound word breaking. The + # frequency is generated with an iterative training against Google web + # corpora. + # + # * Libtabe (Chinese) + # - https://sourceforge.net/project/?group_id=1519 + # - Its license terms and conditions are shown below. + # + # * IPADIC (Japanese) + # - http://chasen.aist-nara.ac.jp/chasen/distribution.html + # - Its license terms and conditions are shown below. + # + # ---------COPYING.libtabe ---- BEGIN-------------------- + # + # /* + # * Copyright (c) 1999 TaBE Project. + # * Copyright (c) 1999 Pai-Hsiang Hsiao. + # * All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the TaBE Project nor the names of its + # * contributors may be used to endorse or promote products derived + # * from this software without specific prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # /* + # * Copyright (c) 1999 Computer Systems and Communication Lab, + # * Institute of Information Science, Academia + # * Sinica. All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the Computer Systems and Communication Lab + # * nor the names of its contributors may be used to endorse or + # * promote products derived from this software without specific + # * prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # Copyright 1996 Chih-Hao Tsai @ Beckman Institute, + # University of Illinois + # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4 + # + # ---------------COPYING.libtabe-----END-------------------------------- + # + # + # ---------------COPYING.ipadic-----BEGIN------------------------------- + # + # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science + # and Technology. All Rights Reserved. + # + # Use, reproduction, and distribution of this software is permitted. + # Any copy of this software, whether in its original form or modified, + # must include both the above copyright notice and the following + # paragraphs. + # + # Nara Institute of Science and Technology (NAIST), + # the copyright holders, disclaims all warranties with regard to this + # software, including all implied warranties of merchantability and + # fitness, in no event shall NAIST be liable for + # any special, indirect or consequential damages or any damages + # whatsoever resulting from loss of use, data or profits, whether in an + # action of contract, negligence or other tortuous action, arising out + # of or in connection with the use or performance of this software. + # + # A large portion of the dictionary entries + # originate from ICOT Free Software. The following conditions for ICOT + # Free Software applies to the current dictionary as well. + # + # Each User may also freely distribute the Program, whether in its + # original form or modified, to any third party or parties, PROVIDED + # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear + # on, or be attached to, the Program, which is distributed substantially + # in the same form as set out herein and that such intended + # distribution, if actually made, will neither violate or otherwise + # contravene any of the laws and regulations of the countries having + # jurisdiction over the User or the intended distribution itself. + # + # NO WARRANTY + # + # The program was produced on an experimental basis in the course of the + # research and development conducted during the project and is provided + # to users as so produced on an experimental basis. Accordingly, the + # program is provided without any warranty whatsoever, whether express, + # implied, statutory or otherwise. The term "warranty" used herein + # includes, but is not limited to, any warranty of the quality, + # performance, merchantability and fitness for a particular purpose of + # the program and the nonexistence of any infringement or violation of + # any right of any third party. + # + # Each user of the program will agree and understand, and be deemed to + # have agreed and understood, that there is no warranty whatsoever for + # the program and, accordingly, the entire risk arising from or + # otherwise connected with the program is assumed by the user. + # + # Therefore, neither ICOT, the copyright holder, or any other + # organization that participated in or was otherwise related to the + # development of the program and their respective officials, directors, + # officers and other employees shall be held liable for any and all + # damages, including, without limitation, general, special, incidental + # and consequential damages, arising out of or otherwise in connection + # with the use or inability to use the program or any product, material + # or result produced or otherwise obtained by using the program, + # regardless of whether they have been advised of, or otherwise had + # knowledge of, the possibility of such damages at any time during the + # project or thereafter. Each user will be deemed to have agreed to the + # foregoing by his or her commencement of use of the program. The term + # "use" as used herein includes, but is not limited to, the use, + # modification, copying and distribution of the program and the + # production of secondary products from the program. + # + # In the case where the program, whether in its original form or + # modified, was distributed or delivered to or received by a user from + # any person, organization or entity other than ICOT, unless it makes or + # grants independently of ICOT any specific warranty to the user in + # writing, such person, organization or entity, will also be exempted + # from and not be held liable to the user for any such damages as noted + # above as far as the program is concerned. + # + # ---------------COPYING.ipadic-----END---------------------------------- + +---------------------------------------------------------------------- + +Lao Word Break Dictionary Data (laodict.txt) + + # Copyright (C) 2016 and later: Unicode, Inc. and others. + # License & terms of use: http://www.unicode.org/copyright.html + # Copyright (c) 2015 International Business Machines Corporation + # and others. All Rights Reserved. + # + # Project: https://github.com/rober42539/lao-dictionary + # Dictionary: https://github.com/rober42539/lao-dictionary/laodict.txt + # License: https://github.com/rober42539/lao-dictionary/LICENSE.txt + # (copied below) + # + # This file is derived from the above dictionary version of Nov 22, 2020 + # ---------------------------------------------------------------------- + # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell. + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. Redistributions in binary + # form must reproduce the above copyright notice, this list of conditions and + # the following disclaimer in the documentation and/or other materials + # provided with the distribution. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # OF THE POSSIBILITY OF SUCH DAMAGE. + # -------------------------------------------------------------------------- + +---------------------------------------------------------------------- + +Burmese Word Break Dictionary Data (burmesedict.txt) + + # Copyright (c) 2014 International Business Machines Corporation + # and others. All Rights Reserved. + # + # This list is part of a project hosted at: + # github.com/kanyawtech/myanmar-karen-word-lists + # + # -------------------------------------------------------------------------- + # Copyright (c) 2013, LeRoy Benjamin Sharon + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: Redistributions of source code must retain the above + # copyright notice, this list of conditions and the following + # disclaimer. Redistributions in binary form must reproduce the + # above copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided + # with the distribution. + # + # Neither the name Myanmar Karen Word Lists, nor the names of its + # contributors may be used to endorse or promote products derived + # from this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + # -------------------------------------------------------------------------- + +---------------------------------------------------------------------- + +Time Zone Database + + ICU uses the public domain data and code derived from Time Zone +Database for its time zone support. The ownership of the TZ database +is explained in BCP 175: Procedure for Maintaining the Time Zone +Database section 7. + + # 7. Database Ownership + # + # The TZ database itself is not an IETF Contribution or an IETF + # document. Rather it is a pre-existing and regularly updated work + # that is in the public domain, and is intended to remain in the + # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do + # not apply to the TZ Database or contributions that individuals make + # to it. Should any claims be made and substantiated against the TZ + # Database, the organization that is providing the IANA + # Considerations defined in this RFC, under the memorandum of + # understanding with the IETF, currently ICANN, may act in accordance + # with all competent court orders. No ownership claims will be made + # by ICANN or the IETF Trust on the database or the code. Any person + # making a contribution to the database or code waives all rights to + # future claims in that contribution or in the TZ Database. + +---------------------------------------------------------------------- + +punycode.cpp: + +Disclaimer and license + + Regarding this entire document or any portion of it (including + the pseudocode and C code), the author makes no guarantees and + is not responsible for any damage resulting from its use. The + author grants irrevocable permission to anyone to use, modify, + and distribute it in any way that does not diminish the rights + of anyone else to use, modify, and distribute it, provided that + redistributed derivative works do not contain misleading author or + version information. Derivative works need not be licensed under + similar terms. + +---------------------------------------------------------------------- + +Google double-conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== libjpeg_turbo + +For a summary of these license terms for the main libjpeg-turbo code, see +LICENSE.md. + +libjpeg-turbo license +--------------------- + This license covers the TurboJPEG API library and associated programs. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +- Neither the name of the libjpeg-turbo Project nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +libjpeg license, Independent JPEG Group +--------------------------------------- + This license applies to the libjpeg API library and associated programs + (any code inherited from libjpeg, and any modifications to that code.) + +The authors make NO WARRANTY or representation, either express or implied, +with respect to this software, its quality, accuracy, merchantability, or +fitness for a particular purpose. This software is provided "AS IS", and you, +its user, assume the entire risk as to its quality and accuracy. + +This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding. +All Rights Reserved except as specified below. + +Permission is hereby granted to use, copy, modify, and distribute this +software (or portions thereof) for any purpose, without fee, subject to these +conditions: +(1) If any part of the source code for this software is distributed, then this +README file must be included, with this copyright and no-warranty notice +unaltered; and any additions, deletions, or changes to the original files +must be clearly indicated in accompanying documentation. +(2) If only executable code is distributed, then the accompanying +documentation must state that "this software is based in part on the work of +the Independent JPEG Group". +(3) Permission for use of this software is granted only if the user accepts +full responsibility for any undesirable consequences; the authors accept +NO LIABILITY for damages of any kind. + +These conditions apply to any software derived from or based on the IJG code, +not just to the unmodified library. If you use our work, you ought to +acknowledge us. + +Permission is NOT granted for the use of any IJG author's name or company name +in advertising or publicity relating to this software or products derived from +it. This software may be referred to only as "the Independent JPEG Group's +software". + +We specifically permit and encourage the use of this software as the basis of +commercial products, provided that all warranty or liability claims are +assumed by the product vendor. + + +The Unix configuration script "configure" was produced with GNU Autoconf. +It is copyright by the Free Software Foundation but is freely distributable. +The same holds for its supporting scripts (config.guess, config.sub, +ltmain.sh). Another support script, install-sh, is copyright by X Consortium +but is also freely distributable. + +The IJG distribution formerly included code to read and write GIF files. +To avoid entanglement with the Unisys LZW patent (now expired), GIF reading +support has been removed altogether, and the GIF writer has been simplified +to produce "uncompressed GIFs". This technique does not use the LZW +algorithm; the resulting GIF files are larger than usual, but are readable +by all standard GIF decoders. + +We are required to state that + "The Graphics Interchange Format(c) is the Copyright property of + CompuServe Incorporated. GIF(sm) is a Service Mark property of + CompuServe Incorporated." + + +zlib License +------------ + This license is a subset of the other two, and it covers the libjpeg-turbo + SIMD extensions. + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + + +"THE BEER-WARE LICENSE" (Revision 42) +------------------------------------- + This license covers //third_party/libjpeg_turbo/src/md5/md5hl.c. + + wrote this file. As long as you retain this notice you +can do whatever you want with this stuff. If we meet some day, and you think +this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + + +MIT License +----------- + This license covers //third_party/libjpeg_turbo/src/doc/html/dynsections.js. + +MIT License Copyright (c) 1997-2020 by Dimitri van Heesch + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== llvm + +Copied from llvm-project/llvm/LICENSE.TXT: +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/libcxx/src/include/ryu/common.h: + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/utils/unittest/googletest/LICENSE.TXT and +llvm-project/llvm/utils/unittest/googlemock/LICENSE.txt: + +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/lib/Support/COPYRIGHT.regex: +$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ + +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 + */ + +============================================================================== +============================================================================== +License for third_party/llvm/llvm-project/llvm/cmake/config.guess: + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +Autoconf Exception + +As a special exception, the Free Software Foundation gives unlimited +permission to copy, distribute and modify the configure scripts that are the +output of Autoconf. You need not follow the terms of the GNU General Public +License when using or distributing such scripts, even though portions of the +text of Autoconf appear in them. The GNU General Public License (GPL) does +govern all other use of the material that constitutes the Autoconf program. + +Certain portions of the Autoconf source text are designed to be copied (in +certain cases, depending on the input) into the output of Autoconf. We call +these the "data" portions. The rest of the Autoconf source text consists of +comments plus executable code that decides which of the data portions to +output in any given case. We call these comments and executable code the "non- +data" portions. Autoconf never copies any of the non-data portions into its +output. + +This special exception to the GPL applies to versions of Autoconf released by +the Free Software Foundation. When you make and distribute a modified version +of Autoconf, you may extend this special exception to the GPL to apply to your +modified version as well, *unless* your modified version has the potential to +copy into its output some of the text that was the non-data portion of the +version that you started with. (In other words, unless your change moves or +copies text from the non-data portions to the data portions.) If your +modification has such potential, you must delete any notice of this special +exception to the GPL from your modified version. + + END OF TERMS AND CONDITIONS + + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/-project/polly/lib/External/isl/LICENSE: + +MIT License (MIT) + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/llgo/third_party/gotools/LICENSE: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +============================================================================== +============================================================================== +Copied from llvm-project/llgo/third_party/gofrontend/libffi/LICENSE: + +libffi - Copyright (c) 1996-2014 Anthony Green, Red Hat, Inc and others. +See source files for details. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/lldb/third_party/Python/module/six/LICENSE: + +Copyright (c) 2010-2015 Benjamin Peterson + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/lldb/third_party/Python/module/pexpect-4.6/LICENSE and +lldb/third_party/Python/module/ptyprocess-0.6.0/LICENSE. + +ISC LICENSE + + This license is approved by the OSI and FSF as GPL-compatible. + http://opensource.org/licenses/isc-license.txt + + Copyright (c) 2013-2014, Pexpect development team + Copyright (c) 2012, Noah Spurrier + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +============================================================================== +============================================================================== +Copied from +llvm-project/clang-tools-extra/clangd/clients/clangd-vscode/LICENSE: + +The MIT License (MIT) + +Copyright (c) 2019 The LLVM Developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/include/llvm/Support/LICENSE.TXT: + +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/test/YAMLParser/LICENSE.txt: + +Copyright (c) 2006 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +============================================================================== +============================================================================== +Copied from llvm-project/clang-tools-extra/clang-tidy/cert/LICENSE.TXT: + +------------------------------------------------------------------------------ +clang-tidy CERT Files +------------------------------------------------------------------------------ +All clang-tidy files are licensed under the same terms as the rest of the LLVM +project with the following additions: + +Any file referencing a CERT Secure Coding guideline: +Please allow this letter to serve as confirmation that open source projects on +http://llvm.org are permitted to link via hypertext to the CERT(R) secure coding +guidelines available at https://www.securecoding.cert.org. + +The foregoing is permitted by the Terms of Use as follows: +"Linking to the Service +Because we update many of our Web documents regularly, we would prefer that you +link to our Web pages whenever possible rather than reproduce them. It is not +necessary to request permission to make referential hypertext links to The +Service." +http://www.sei.cmu.edu/legal/ip/index.cfm. + +Please allow this letter to also confirm that no formal permission is required +to reproduce the title of the content being linked to, nor to reproduce any +de Minimis description of such content. + +============================================================================== +============================================================================== +Copied from llvm-project/clang-tools-extra/clang-tidy/hicpp/LICENSE.TXT: + +------------------------------------------------------------------------------ +clang-tidy High-Integrity C++ Files +------------------------------------------------------------------------------ +All clang-tidy files are licensed under the same terms as the rest of the LLVM +project with the following additions: + +Any file referencing a High-Integrity C++ Coding guideline: + +HIC++ Coding Standard as created by PRQA. + +Please see http://www.codingstandard.com/section/conditions-of-use/ for more +information. + +============================================================================== +============================================================================== +Copied from llvm-project/polly/lib/External/isl/interface/cpp.cc: + +Copyright 2016, 2017 Tobias Grosser. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY TOBIAS GROSSER ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SVEN VERDOOLAEGE OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation +are those of the authors and should not be interpreted as +representing official policies, either expressed or implied, of +Tobias Grosser. + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/lib/Support/BLAKE3/LICENSE: + +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. + + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp: + +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE + +See Terms of Use +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2022 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + + +============================================================================== +============================================================================== +Copied from llvm-project/llvm/include/llvm/Support/ConvertUTF.h: + +Copyright © 1991-2015 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in +http://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, +(b) this copyright and permission notice appear in associated +documentation, and +(c) there is clear notice in each modified Data File or in the Software +as well as in the documentation associated with the Data File(s) or +Software that the data or software has been modified. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + + +============================================================================== +============================================================================== +Copied from docker_kokoro/dockerfiles/scripts/google_packages/deb_packages/copyright: + +Files: libcxx/utils/google-benchmark/* +License: Apache 2.0 + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== ml_dtypes + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== nccl + + Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National + Laboratory, the U.S. Department of Energy, nor the names of their + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + The U.S. Department of Energy funded the development of this software + under subcontract 7078610 with Lawrence Berkeley National Laboratory. + + +This code also includes files from the NVIDIA Tools Extension SDK project. + +See: + + https://github.com/NVIDIA/NVTX + +for more information and license details. + +================================================================================ +NVTX LICENSE.txt - verbatim copy of https://llvm.org/LICENSE.txt +================================================================================ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== nsync + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== numpy + +Copyright (c) 2005-2019, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +numpy/core/src/multiarray/dragon4.c: +------------------------------------ + +/* + * Copyright (c) 2014 Ryan Juckett + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * This file contains a modified version of Ryan Juckett's Dragon4 + * implementation, obtained from http://www.ryanjuckett.com, + * which has been ported from C++ to C and which has + * modifications specific to printing floats in numpy. + * + * Ryan Juckett's original code was under the Zlib license; he gave numpy + * permission to include it under the MIT license instead. + */ + +numpy/core/src/npymath/ieee754.c.src: +------------------------------------- +/* + * nextafter code taken from BSD math lib, the code contains the following + * notice: + * + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + + +numpy/core/src/npymath/npy_math_complex.c.src: +---------------------------------------------- + + Most of the code is taken from the msun library in FreeBSD (HEAD @ 4th + October 2013), under the following license: + + Copyright (c) 2007, 2011 David Schultz + Copyright (c) 2012 Stephen Montgomery-Smith + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +numpy/doc/sphinxext/: +-------------------- + +------------------------------------------------------------------------------- + The files + - numpydoc.py + - docscrape.py + - docscrape_sphinx.py + - phantom_import.py + have the following license: + +Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------- + The files + - compiler_unparse.py + - comment_eater.py + - traitsdoc.py + have the following license: + +This software is OSI Certified Open Source Software. +OSI Certified is a certification mark of the Open Source Initiative. + +Copyright (c) 2006, Enthought, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Enthought, Inc. nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +------------------------------------------------------------------------------- + The file + - plot_directive.py + originates from Matplotlib (http://matplotlib.sf.net/) which has + the following license: + +Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved. + +1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3. + +4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement. + + +numpy/linalg/lapack_lite/: +-------------------------- + +Copyright (c) 1992-2013 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2000-2013 The University of California Berkeley. All + rights reserved. +Copyright (c) 2006-2013 The University of Colorado Denver. All rights + reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + +- Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +The copyright holders provide no reassurances that the source code +provided does not infringe any patent, copyright, or any other +intellectual property rights of third parties. The copyright holders +disclaim any liability to any recipient for claims brought against +recipient by any third party for infringement of that parties +intellectual property rights. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +numpy/random: +------------- + +**This software is dual-licensed under the The University of Illinois/NCSA +Open Source License (NCSA) and The 3-Clause BSD License** + +# NCSA Open Source License +**Copyright (c) 2019 Kevin Sheppard. All rights reserved.** + +Developed by: Kevin Sheppard (, +) +[http://www.kevinsheppard.com](http://www.kevinsheppard.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimers. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimers in the documentation and/or +other materials provided with the distribution. + +Neither the names of Kevin Sheppard, nor the names of any contributors may be +used to endorse or promote products derived from this Software without specific +prior written permission. + +**THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH +THE SOFTWARE.** + + +# 3-Clause BSD License +**Copyright (c) 2019 Kevin Sheppard. All rights reserved.** + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +**THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE.** + +# Components + +Many parts of this module have been derived from original sources, +often the algorithm's designer. Component licenses are located with +the component code. + + +numpy/random/src/mt19937: +------------------------- + +Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + +The rk_random and rk_seed functions algorithms and the original design of +the Mersenne Twister RNG: + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Original algorithm for the implementation of rk_interval function from +Richard J. Wagner's implementation of the Mersenne Twister RNG, optimised by +Magnus Jonsson. + +Constants used in the rk_double implementation by Isaku Wada. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +numpy/random/src/pcg64/: +------------------------ + +/* + * PCG64 Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill + * Copyright 2015 Robert Kern + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + * + * Relicensed MIT in May 2019 + * + * The MIT License + * + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +Full text of Apache license: + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +numpy/testing/_private/parameterized.py: +---------------------------------------- + +tl;dr: all code code is licensed under simplified BSD, unless stated otherwise. + +Unless stated otherwise in the source files, all code is copyright 2010 David +Wolever . All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of David Wolever. + + +numpy/tools/npy_tempita/: +------------------------- + +Copyright (c) 2008 Ian Bicking and Contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +numpy/core/include/numpy/libdivide/*: +------------------------------------- + zlib License + ------------ + + Copyright (C) 2010 - 2019 ridiculous_fish, + Copyright (C) 2016 - 2019 Kim Walisch, + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== onednn + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + ============================================================================ + + Copyright 2016-2023 Intel Corporation + Copyright 2018 YANDEX LLC + Copyright 2019-2023 FUJITSU LIMITED + Copyright 2020-2023 Arm Ltd. and affiliates + Copyright 2020-2022 Codeplay Software Limited + Copyright 2021 Alanna Tempest + Copyright 2022-2023 IBM Corporation + Copyright 2023 KNS Group LLC (YADRO) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + This distribution includes third party software ("third party programs"). + This third party software, even if included with the distribution of + the Intel software, may be governed by separate license terms, including + without limitation, third party license terms, other Intel software license + terms, and open source software license terms. These separate license terms + govern your use of the third party programs as set forth in the + "THIRD-PARTY-PROGRAMS" file. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== opt_einsum + +The MIT License (MIT) + +Copyright (c) 2014 Daniel Smith + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== png + +COPYRIGHT NOTICE, DISCLAIMER, and LICENSE +========================================= + +PNG Reference Library License version 2 +--------------------------------------- + + * Copyright (c) 1995-2022 The PNG Reference Library Authors. + * Copyright (c) 2018-2022 Cosmin Truta. + * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson. + * Copyright (c) 1996-1997 Andreas Dilger. + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + +The software is supplied "as is", without warranty of any kind, +express or implied, including, without limitation, the warranties +of merchantability, fitness for a particular purpose, title, and +non-infringement. In no event shall the Copyright owners, or +anyone distributing the software, be liable for any damages or +other liability, whether in contract, tort or otherwise, arising +from, out of, or in connection with the software, or the use or +other dealings in the software, even if advised of the possibility +of such damage. + +Permission is hereby granted to use, copy, modify, and distribute +this software, or portions hereof, for any purpose, without fee, +subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you + must not claim that you wrote the original software. If you + use this software in a product, an acknowledgment in the product + documentation would be appreciated, but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This Copyright notice may not be removed or altered from any + source or altered source distribution. + + +PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35) +----------------------------------------------------------------------- + +libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are +Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are +derived from libpng-1.0.6, and are distributed according to the same +disclaimer and license as libpng-1.0.6 with the following individuals +added to the list of Contributing Authors: + + Simon-Pierre Cadieux + Eric S. Raymond + Mans Rullgard + Cosmin Truta + Gilles Vollant + James Yu + Mandar Sahastrabuddhe + Google Inc. + Vadim Barkov + +and with the following additions to the disclaimer: + + There is no warranty against interference with your enjoyment of + the library or against infringement. There is no warranty that our + efforts or the library will fulfill any of your particular purposes + or needs. This library is provided with all faults, and the entire + risk of satisfactory quality, performance, accuracy, and effort is + with the user. + +Some files in the "contrib" directory and some configure-generated +files that are distributed with libpng have other copyright owners, and +are released under other open source licenses. + +libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are +Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from +libpng-0.96, and are distributed according to the same disclaimer and +license as libpng-0.96, with the following individuals added to the +list of Contributing Authors: + + Tom Lane + Glenn Randers-Pehrson + Willem van Schaik + +libpng versions 0.89, June 1996, through 0.96, May 1997, are +Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88, +and are distributed according to the same disclaimer and license as +libpng-0.88, with the following individuals added to the list of +Contributing Authors: + + John Bowler + Kevin Bracey + Sam Bushell + Magnus Holmgren + Greg Roelofs + Tom Tanner + +Some files in the "scripts" directory have other copyright owners, +but are released under this license. + +libpng versions 0.5, May 1995, through 0.88, January 1996, are +Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + +For the purposes of this copyright and license, "Contributing Authors" +is defined as the following set of individuals: + + Andreas Dilger + Dave Martindale + Guy Eric Schalnat + Paul Schmidt + Tim Wegner + +The PNG Reference Library is supplied "AS IS". The Contributing +Authors and Group 42, Inc. disclaim all warranties, expressed or +implied, including, without limitation, the warranties of +merchantability and of fitness for any purpose. The Contributing +Authors and Group 42, Inc. assume no liability for direct, indirect, +incidental, special, exemplary, or consequential damages, which may +result from the use of the PNG Reference Library, even if advised of +the possibility of such damage. + +Permission is hereby granted to use, copy, modify, and distribute this +source code, or portions hereof, for any purpose, without fee, subject +to the following restrictions: + + 1. The origin of this source code must not be misrepresented. + + 2. Altered versions must be plainly marked as such and must not + be misrepresented as being the original source. + + 3. This Copyright notice may not be removed or altered from any + source or altered source distribution. + +The Contributing Authors and Group 42, Inc. specifically permit, +without fee, and encourage the use of this source code as a component +to supporting the PNG file format in commercial products. If you use +this source code in a product, acknowledgment is not required but would +be appreciated. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== protobuf + +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The above license applies to all files in this directory and +subdirectories, with exceptions noted below. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +=========================================================================== +rust/utf8.rs +=========================================================================== +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== pybind11 + +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== Python + +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see https://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations, which became +Zope Corporation. In 2001, the Python Software Foundation (PSF, see +https://www.python.org/psf/) was formed, a non-profit organization +created specifically to own Python-related Intellectual Property. +Zope Corporation was a sponsoring member of the PSF. + +All Python releases are Open Source (see https://opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2 and above 2.1.1 2001-now PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +Python software and documentation are licensed under the +Python Software Foundation License Version 2. + +Starting with Python 3.8.6, examples, recipes, and other code in +the documentation are dual licensed under the PSF License Version 2 +and the Zero-Clause BSD license. + +Some software incorporated into Python is under different licenses. +The licenses are listed with code falling under that license. + + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON DOCUMENTATION +---------------------------------------------------------------------- + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== re2 + +// Copyright (c) 2009 The RE2 Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== setuptools + +Copyright (C) 2016 Jason R Coombs + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== six + +Copyright (c) 2010-2020 Benjamin Peterson + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== snappy + +Copyright (c) 2011-2017, Andres Moreira +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the authors nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANDRES MOREIRA BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== stablehlo + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== termcolor + +Copyright (c) 2008-2011 Volvox Development Team + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== typing_extensions + +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations, which became +Zope Corporation. In 2001, the Python Software Foundation (PSF, see +https://www.python.org/psf/) was formed, a non-profit organization +created specifically to own Python-related Intellectual Property. +Zope Corporation was a sponsoring member of the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2 and above 2.1.1 2001-now PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== upb + +Copyright (c) 2009-2021, Google LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Google LLC nor the names of any other + contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL GOOGLE LLC BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== wrapt + +Copyright (c) 2013-2023, Graham Dumpleton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== zlib + + (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +If you use the zlib library in a product, we would appreciate *not* receiving +lengthy legal documents to sign. The sources are provided for free but without +warranty of any kind. The library has been entirely written by Jean-loup +Gailly and Mark Adler; it does not include third-party code. + +If you redistribute modified sources, we would appreciate that you include in +the file ChangeLog history information documenting your changes. Please read +the FAQ for more information on the distribution of modified source versions. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== gif + +The GIFLIB distribution is Copyright (c) 1997 Eric S. Raymond + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== sqlite + +All of the code and documentation in SQLite has been dedicated to the public +domain by the authors. All code authors, and representatives of the companies +they work for, have signed affidavits dedicating their contributions to the +public domain and originals of those signed affidavits are stored in a firesafe +at the main offices of Hwaci. Anyone is free to copy, modify, publish, use, +compile, sell, or distribute the original SQLite code, either in source code form +or as a compiled binary, for any purpose, commercial or non-commercial, and by +any means. + +The previous paragraph applies to the deliverable code and documentation in +SQLite - those parts of the SQLite library that you actually bundle and ship +with a larger application. Some scripts used as part of the build process (for +example the "configure" scripts generated by autoconf) might fall under other +open-source licenses. Nothing from these build scripts ever reaches the final +deliverable SQLite library, however, and so the licenses associated with those +scripts should not be a factor in assessing your rights to copy and use the +SQLite library. + +All of the deliverable code in SQLite has been written from scratch. No code has +been taken from other projects or from the open internet. Every line of code can +be traced back to its original author, and all of those authors have public +domain dedications on file. So the SQLite code base is clean and is +uncontaminated with licensed code from other projects. + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +== triton + +/* +* Copyright 2018-2020 Philippe Tillet +* Copyright 2020-2022 OpenAI +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files +* (the "Software"), to deal in the Software without restriction, +* including without limitation the rights to use, copy, modify, merge, +* publish, distribute, sublicense, and/or sell copies of the Software, +* and to permit persons to whom the Software is furnished to do so, +* subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +Copyright (c) . All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +MIT License Copyright (c) 2020 Da Yan @ HKUST + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- \ No newline at end of file From 879a5e37eb350f556debec3cd5c9b6dcb8809318 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 20 Sep 2023 09:55:15 -0700 Subject: [PATCH 040/567] Rename tsl/cuda/BUILD to tsl/cuda/BUILD.bazel. Unify _stub and _lib targets in tsl/cuda (under an unsuffixed name), and update users to point directly to merged target. This also allows us to remove some .bzl macros. PiperOrigin-RevId: 566996155 --- tensorflow/core/BUILD | 2 +- tensorflow/core/common_runtime/BUILD | 2 +- tensorflow/core/profiler/backends/gpu/BUILD | 12 +-- tensorflow/python/BUILD | 6 +- tensorflow/tensorflow.bzl | 2 +- .../tsl/tsl/cuda/{BUILD => BUILD.bazel} | 79 +++---------------- .../xla/third_party/tsl/tsl/platform/BUILD | 8 -- third_party/xla/third_party/tsl/tsl/tsl.bzl | 2 +- .../xla/xla/backends/profiler/gpu/BUILD | 20 ++--- .../experiments/sm_bandwidth_benchmark/BUILD | 2 +- third_party/xla/xla/stream_executor/BUILD | 4 +- .../xla/xla/stream_executor/build_defs.bzl | 6 -- .../xla/xla/stream_executor/cuda/BUILD | 40 +++++----- 13 files changed, 52 insertions(+), 133 deletions(-) rename third_party/xla/third_party/tsl/tsl/cuda/{BUILD => BUILD.bazel} (74%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 48b5e4da4fb2ba..757e75829b4503 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1164,7 +1164,7 @@ alias( alias( name = "cuda", - actual = "@local_tsl//tsl/platform:cuda", + actual = "@local_tsl//tsl/cuda:cudart", visibility = ["//visibility:public"], ) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 496e1a367b484b..4b7c5eb8b7c29d 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -2740,7 +2740,7 @@ tf_cuda_cc_test( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@eigen_archive//:eigen3", - ] + if_cuda(["@local_tsl//tsl/platform:cuda"]), + ] + if_cuda(["@local_tsl//tsl/cuda:cudart"]), ) # This is identical to :common_runtime_direct_session_test with the addition of diff --git a/tensorflow/core/profiler/backends/gpu/BUILD b/tensorflow/core/profiler/backends/gpu/BUILD index 874440391a6580..674c90237a11de 100644 --- a/tensorflow/core/profiler/backends/gpu/BUILD +++ b/tensorflow/core/profiler/backends/gpu/BUILD @@ -1,5 +1,4 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library", "if_cuda") -load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm") load( "//tensorflow:tensorflow.bzl", "tf_copts", @@ -10,11 +9,8 @@ load( "//tensorflow/core/platform:build_config_root.bzl", "tf_cuda_tests_tags", ) -load( - "@local_xla//xla/stream_executor:build_defs.bzl", - "tf_additional_cupti_deps", -) load("//tensorflow/core/profiler/builds:build_config.bzl", "tf_profiler_copts") +load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm") load( "@local_tsl//tsl/platform/default:cuda_build_defs.bzl", "if_cuda_is_configured", @@ -77,7 +73,7 @@ tf_cuda_library( "//tensorflow/core:lib", "//tensorflow/core:platform_base", "@local_xla//xla/backends/profiler/gpu:cupti_interface", - ] + tf_additional_cupti_deps(), + ] + if_cuda(["@local_tsl//tsl/cuda:cupti"]), ) tf_cuda_library( @@ -146,7 +142,7 @@ tf_cuda_library( deps = [ ":cupti_interface", "@local_xla//xla/backends/profiler/gpu:cupti_wrapper", - ] + tf_additional_cupti_deps(), + ] + if_cuda(["@local_tsl//tsl/cuda:cupti"]), ) tf_cuda_library( @@ -219,7 +215,7 @@ tf_cuda_library( "@com_google_absl//absl/strings", "@local_tsl//tsl/profiler/utils:parse_annotation", "@local_xla//xla/backends/profiler/gpu:cupti_collector", - ] + tf_additional_cupti_deps(), + ] + if_cuda(["@local_tsl//tsl/cuda:cupti"]), ) cc_library( diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 70cb50b408d61d..2fea59e32f411e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -846,9 +846,9 @@ pywrap_tensorflow_macro( # nvidia standalone wheels into pywrap_tensorflow_internal. We might be # able to remove this in the future, as these stubs should already # be brought in via other dependencies. - "@local_tsl//tsl/cuda:cudnn_stub", - "@local_tsl//tsl/cuda:cufft_stub", - "@local_tsl//tsl/cuda:nccl_rpath_stub", + "@local_tsl//tsl/cuda:cudnn", + "@local_tsl//tsl/cuda:cufft", + "@local_tsl//tsl/cuda:nccl_rpath", ])) + if_xla_available([ "//tensorflow/compiler/aot:tfcompile_lib", ]) + tf_monitoring_python_deps() + tf_additional_plugin_deps() + tf_additional_profiler_deps() + tf_additional_binary_deps(), diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 202ea7cac66796..58ac889dd8997f 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1912,7 +1912,7 @@ def tf_gpu_kernel_library( hdrs = hdrs, copts = copts, deps = deps + if_cuda([ - clean_dep("@local_tsl//tsl/cuda:cudart_stub"), + clean_dep("@local_tsl//tsl/cuda:cudart"), ]) + if_cuda_or_rocm([ clean_dep("//tensorflow/core:gpu_lib"), ]), diff --git a/third_party/xla/third_party/tsl/tsl/cuda/BUILD b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel similarity index 74% rename from third_party/xla/third_party/tsl/tsl/cuda/BUILD rename to third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel index efcf0bf7dd12ae..ecb7bdb2cfffa7 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/BUILD +++ b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel @@ -17,12 +17,11 @@ load( package( default_visibility = ["//visibility:public"], - # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], licenses = ["notice"], ) cc_library( - name = "cublas_stub", + name = "cublas", srcs = if_cuda_is_configured(["cublas_stub.cc"]), linkopts = if_cuda_is_configured(cuda_rpath_flags( "nvidia/cublas/lib", @@ -36,17 +35,8 @@ cc_library( ]), ) -alias( - name = "cublas_lib", - actual = select({ - "//tsl:oss": ":cublas_stub", - "//conditions:default": "@local_config_cuda//cuda:cublas", - }), - visibility = ["//visibility:public"], -) - cc_library( - name = "cublas_lt_stub", + name = "cublas_lt", srcs = if_cuda_is_configured(["cublasLt_stub.cc"]), textual_hdrs = glob(["cublasLt_*.inc"]), visibility = ["//visibility:public"], @@ -57,17 +47,8 @@ cc_library( ]), ) -alias( - name = "cublas_lt_lib", - actual = select({ - "//tsl:oss": ":cublas_lt_stub", - "//conditions:default": "@local_config_cuda//cuda:cublasLt", - }), - visibility = ["//visibility:public"], -) - cc_library( - name = "cuda_stub", + name = "cuda", srcs = if_cuda_is_configured(["cuda_stub.cc"]), textual_hdrs = glob(["cuda_*.inc"]), visibility = ["//visibility:public"], @@ -79,7 +60,7 @@ cc_library( ) cc_library( - name = "cudart_stub", + name = "cudart", srcs = select({ # include dynamic loading implementation only when if_cuda_is_configured and build dynamically "//tsl:is_cuda_enabled_and_oss": ["cudart_stub.cc"], @@ -93,7 +74,7 @@ cc_library( visibility = ["//visibility:public"], deps = select({ "//tsl:is_cuda_enabled_and_oss": [ - ":cuda_stub", + ":cuda", "//tsl/platform:dso_loader", "//tsl/platform:env", "@local_config_cuda//cuda:cuda_headers", @@ -103,7 +84,7 @@ cc_library( ) cc_library( - name = "cudnn_stub", + name = "cudnn", srcs = if_cuda_is_configured(["cudnn_stub.cc"]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cudnn/lib")), textual_hdrs = glob(["cudnn_*.inc"]), @@ -117,26 +98,17 @@ cc_library( ) cc_library( - name = "nccl_rpath_stub", + name = "nccl_rpath", linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/nccl/lib")), visibility = ["//visibility:public"], ) cc_library( - name = "tensorrt_rpath_stub", + name = "tensorrt_rpath", linkopts = if_cuda_is_configured(cuda_rpath_flags("tensorrt")), visibility = ["//visibility:public"], ) -alias( - name = "cudnn_lib", - actual = select({ - "//tsl:oss": ":cudnn_stub", - "//conditions:default": "@local_config_cuda//cuda:cudnn", - }), - visibility = ["//visibility:public"], -) - cc_library( name = "cudnn_version", srcs = ["cudnn_version.cc"], @@ -159,7 +131,7 @@ tsl_cc_test( ) cc_library( - name = "cufft_stub", + name = "cufft", srcs = if_cuda_is_configured(["cufft_stub.cc"]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cufft/lib")), textual_hdrs = glob(["cufft_*.inc"]), @@ -171,17 +143,8 @@ cc_library( ]), ) -alias( - name = "cufft_lib", - actual = select({ - "//tsl:oss": ":cufft_stub", - "//conditions:default": "@local_config_cuda//cuda:cufft", - }), - visibility = ["//visibility:public"], -) - cc_library( - name = "cupti_stub", + name = "cupti", srcs = if_cuda_is_configured(["cupti_stub.cc"]), data = if_cuda_is_configured(["@local_config_cuda//cuda:cupti_dsos"]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cuda_cupti/lib")), @@ -196,7 +159,7 @@ cc_library( ) cc_library( - name = "cusolver_stub", + name = "cusolver", srcs = if_cuda_is_configured(["cusolver_stub.cc"]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cusolver/lib")), textual_hdrs = glob(["cusolver_dense_*.inc"]), @@ -208,17 +171,8 @@ cc_library( ]), ) -alias( - name = "cusolver_lib", - actual = select({ - "//tsl:oss": ":cusolver_stub", - "//conditions:default": "@local_config_cuda//cuda:cusolver", - }), - visibility = ["//visibility:public"], -) - cc_library( - name = "cusparse_stub", + name = "cusparse", srcs = if_cuda_is_configured(["cusparse_stub.cc"]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cusparse/lib")), textual_hdrs = glob(["cusparse_*.inc"]), @@ -229,12 +183,3 @@ cc_library( "//tsl/platform:env", ]), ) - -alias( - name = "cusparse_lib", - actual = select({ - "//tsl:oss": ":cusparse_stub", - "//conditions:default": "@local_config_cuda//cuda:cusparse", - }), - visibility = ["//visibility:public"], -) diff --git a/third_party/xla/third_party/tsl/tsl/platform/BUILD b/third_party/xla/third_party/tsl/tsl/platform/BUILD index 2574e65bf2fd1c..22212210dccb6a 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/BUILD +++ b/third_party/xla/third_party/tsl/tsl/platform/BUILD @@ -1102,14 +1102,6 @@ cc_library( ], ) -cc_library( - name = "cuda", - visibility = ["//visibility:public"], - deps = [ - "@local_config_cuda//cuda:cudart_static", - ], -) - cc_library( name = "dso_loader", hdrs = ["dso_loader.h"], diff --git a/third_party/xla/third_party/tsl/tsl/tsl.bzl b/third_party/xla/third_party/tsl/tsl/tsl.bzl index 4bd78c3f771e55..adc0cbf00c39c4 100644 --- a/third_party/xla/third_party/tsl/tsl/tsl.bzl +++ b/third_party/xla/third_party/tsl/tsl/tsl.bzl @@ -321,7 +321,7 @@ def tsl_gpu_library(deps = None, cuda_deps = None, copts = tsl_copts(), **kwargs kwargs.pop("default_copts", None) cc_library( deps = deps + if_cuda([ - clean_dep("//tsl/cuda:cudart_stub"), + clean_dep("//tsl/cuda:cudart"), "@local_config_cuda//cuda:cuda_headers", ]) + if_rocm_is_configured([ "@local_config_rocm//rocm:rocm_headers", diff --git a/third_party/xla/xla/backends/profiler/gpu/BUILD b/third_party/xla/xla/backends/profiler/gpu/BUILD index 1ec816c76f8784..09de99f682ba6f 100644 --- a/third_party/xla/xla/backends/profiler/gpu/BUILD +++ b/third_party/xla/xla/backends/profiler/gpu/BUILD @@ -1,4 +1,8 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library", "if_cuda") +load( + "//xla:xla.bzl", + "xla_cc_test", +) load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm") load( "@local_tsl//tsl:tsl.bzl", @@ -13,19 +17,11 @@ load( "@local_tsl//tsl/platform:build_config_root.bzl", "tf_cuda_tests_tags", ) -load( - "//xla/stream_executor:build_defs.bzl", - "tf_additional_cupti_deps", -) -load("@local_tsl//tsl/profiler/builds:build_config.bzl", "tf_profiler_copts") load( "@local_tsl//tsl/platform/default:cuda_build_defs.bzl", "if_cuda_is_configured", ) -load( - "//xla:xla.bzl", - "xla_cc_test", -) +load("@local_tsl//tsl/profiler/builds:build_config.bzl", "tf_profiler_copts") package( default_visibility = ["//visibility:public"], @@ -74,7 +70,7 @@ tsl_gpu_library( deps = [ "@local_tsl//tsl/platform:macros", "@local_tsl//tsl/platform:types", - ] + tf_additional_cupti_deps(), + ] + if_cuda(["@local_tsl//tsl/cuda:cupti"]), ) tsl_gpu_library( @@ -169,7 +165,7 @@ tsl_gpu_library( visibility = ["//visibility:public"], deps = [ ":cupti_interface", - ] + tf_additional_cupti_deps(), + ] + if_cuda(["@local_tsl//tsl/cuda:cupti"]), ) tsl_gpu_library( @@ -287,7 +283,7 @@ tsl_gpu_library( "@local_tsl//tsl/profiler/utils:xplane_builder", "@local_tsl//tsl/profiler/utils:xplane_schema", "@local_tsl//tsl/profiler/utils:xplane_utils", - ] + tf_additional_cupti_deps(), + ] + if_cuda(["@local_tsl//tsl/cuda:cupti"]), ) cc_library( diff --git a/third_party/xla/xla/experiments/sm_bandwidth_benchmark/BUILD b/third_party/xla/xla/experiments/sm_bandwidth_benchmark/BUILD index 5128f72a982e91..003c091632c73f 100644 --- a/third_party/xla/xla/experiments/sm_bandwidth_benchmark/BUILD +++ b/third_party/xla/xla/experiments/sm_bandwidth_benchmark/BUILD @@ -33,6 +33,6 @@ xla_cc_test( ":sm_bw_utils", "@com_google_googletest//:gtest_main", ] + if_cuda([ - "@local_tsl//tsl/platform:cuda", + "@local_tsl//tsl/cuda:cudart", ]), ) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 2c8f6005e824ba..6760fa8d6fa477 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -6,7 +6,7 @@ load("//xla:xla.bzl", "xla_cc_test") load("//xla/stream_executor:build_defs.bzl", "stream_executor_friends") -load("@local_tsl//tsl:tsl.bzl", "set_external_visibility", "transitive_hdrs", "tsl_gpu_library") +load("@local_tsl//tsl:tsl.bzl", "set_external_visibility", "transitive_hdrs") load("@local_tsl//tsl:tsl.default.bzl", "filegroup") load("@local_tsl//tsl/platform:build_config.bzl", "tf_proto_library") load("@local_tsl//tsl/platform:build_config_root.bzl", "if_static") @@ -433,7 +433,7 @@ cc_library( ) # It implements :stream_executor_pimpl_header -tsl_gpu_library( +cc_library( name = "stream_executor_pimpl", srcs = [ "stream.cc", diff --git a/third_party/xla/xla/stream_executor/build_defs.bzl b/third_party/xla/xla/stream_executor/build_defs.bzl index a52f22c066f565..c76ff0bdea1074 100644 --- a/third_party/xla/xla/stream_executor/build_defs.bzl +++ b/third_party/xla/xla/stream_executor/build_defs.bzl @@ -7,12 +7,6 @@ def stream_executor_friends(): def tf_additional_cuda_platform_deps(): return [] -def tf_additional_cuda_driver_deps(): - return ["@local_tsl//tsl/cuda:cuda_stub"] - -def tf_additional_cupti_deps(): - return ["@local_xla//xla/stream_executor/cuda:cupti_stub"] - def tf_additional_cudnn_plugin_deps(): return [] diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index 9ac7571e2dc8b7..b008a8a17559d1 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -8,7 +8,6 @@ load( load( "//xla/stream_executor:build_defs.bzl", "stream_executor_friends", - "tf_additional_cuda_driver_deps", "tf_additional_cuda_platform_deps", "tf_additional_cudnn_plugin_copts", "tf_additional_cudnn_plugin_deps", @@ -127,18 +126,15 @@ cc_library( "//xla/stream_executor/gpu:gpu_driver_header", "//xla/stream_executor/platform", "//xla/stream_executor/platform:dso_loader", + "@local_tsl//tsl/cuda", + "@local_tsl//tsl/cuda:cudart", "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:static_threadlocal", - ] + tf_additional_cuda_driver_deps()) + select({ - # include dynamic loading implementation only when if_cuda_is_configured and build dynamically - "@local_tsl//tsl:is_cuda_enabled_and_oss": ["cudart_stub"], - "//conditions:default": ["@local_tsl//tsl/platform:cuda"], - }) + [ "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", - ], + ]), ) xla_cc_test( @@ -191,7 +187,7 @@ xla_cc_test( alias( name = "cudart_stub", - actual = "@local_tsl//tsl/cuda:cudart_stub", + actual = "@local_tsl//tsl/cuda:cudart", visibility = ["//visibility:public"], ) @@ -236,13 +232,13 @@ cc_library( alias( name = "cublas_stub", - actual = "@local_tsl//tsl/cuda:cublas_stub", + actual = "@local_tsl//tsl/cuda:cublas", visibility = ["//visibility:public"], ) alias( name = "cublas_lib", - actual = "@local_tsl//tsl/cuda:cublas_lib", + actual = "@local_tsl//tsl/cuda:cublas", visibility = ["//visibility:public"], ) @@ -264,13 +260,13 @@ cc_library( alias( name = "cublas_lt_stub", - actual = "@local_tsl//tsl/cuda:cublas_lt_stub", + actual = "@local_tsl//tsl/cuda:cublas_lt", visibility = ["//visibility:public"], ) alias( name = "cublas_lt_lib", - actual = "@local_tsl//tsl/cuda:cublas_lt_lib", + actual = "@local_tsl//tsl/cuda:cublas_lt", visibility = ["//visibility:public"], ) @@ -334,13 +330,13 @@ cc_library( alias( name = "cufft_stub", - actual = "@local_tsl//tsl/cuda:cufft_stub", + actual = "@local_tsl//tsl/cuda:cufft", visibility = ["//visibility:public"], ) alias( name = "cufft_lib", - actual = "@local_tsl//tsl/cuda:cufft_lib", + actual = "@local_tsl//tsl/cuda:cufft", visibility = ["//visibility:public"], ) @@ -370,13 +366,13 @@ cc_library( alias( name = "cudnn_stub", - actual = "@local_tsl//tsl/cuda:cudnn_stub", + actual = "@local_tsl//tsl/cuda:cudnn", visibility = ["//visibility:public"], ) alias( name = "cudnn_lib", - actual = "@local_tsl//tsl/cuda:cudnn_lib", + actual = "@local_tsl//tsl/cuda:cudnn", visibility = ["//visibility:public"], ) @@ -432,37 +428,37 @@ cc_library( alias( name = "cupti_stub", - actual = "@local_tsl//tsl/cuda:cupti_stub", + actual = "@local_tsl//tsl/cuda:cupti", visibility = ["//visibility:public"], ) alias( name = "cusolver_stub", - actual = "@local_tsl//tsl/cuda:cusolver_stub", + actual = "@local_tsl//tsl/cuda:cusolver", visibility = ["//visibility:public"], ) alias( name = "cusolver_lib", - actual = "@local_tsl//tsl/cuda:cusolver_lib", + actual = "@local_tsl//tsl/cuda:cusolver", visibility = ["//visibility:public"], ) alias( name = "cusparse_stub", - actual = "@local_tsl//tsl/cuda:cusparse_stub", + actual = "@local_tsl//tsl/cuda:cusparse", visibility = ["//visibility:public"], ) alias( name = "cusparse_lib", - actual = "@local_tsl//tsl/cuda:cusparse_lib", + actual = "@local_tsl//tsl/cuda:cusparse", visibility = ["//visibility:public"], ) alias( name = "tensorrt_rpath_stub", - actual = "@local_tsl//tsl/cuda:tensorrt_rpath_stub", + actual = "@local_tsl//tsl/cuda:tensorrt_rpath", visibility = ["//visibility:public"], ) From 8ddb2e6879fedfa6928bdbcb97c2e665464338ef Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 20 Sep 2023 11:20:47 -0700 Subject: [PATCH 041/567] Refer to CUDA stubs directly from TSL, rather than using an alias defined in xla/stream_executor. Remove the aliases in xla/stream_executor. PiperOrigin-RevId: 567025507 --- tensorflow/core/util/BUILD | 22 ++-- third_party/xla/xla/service/gpu/BUILD | 2 +- .../xla/xla/stream_executor/cuda/BUILD | 108 ++---------------- 3 files changed, 21 insertions(+), 111 deletions(-) diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index fbd116d1d2e9a9..aa86cb6a0ff568 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -1,11 +1,4 @@ -load( - "//tensorflow/core/platform:build_config.bzl", - "tf_proto_library", -) -load( - "//tensorflow/core/platform:rules_cc.bzl", - "cc_library", -) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( "//tensorflow:tensorflow.bzl", "check_deps", @@ -19,11 +12,18 @@ load( "tf_mkl_kernel_library", ) load("//tensorflow:tensorflow.default.bzl", "filegroup", "get_compatible_with_portable", "tf_version_info_genrule") +load( + "//tensorflow/core/platform:build_config.bzl", + "tf_proto_library", +) load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library", +) load( "@local_config_rocm//rocm:build_defs.bzl", "if_rocm", @@ -739,8 +739,8 @@ tf_kernel_library( deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", + "@local_tsl//tsl/cuda:cusolver", "@local_xla//xla/stream_executor/cuda:cublas_plugin", - "@local_xla//xla/stream_executor/cuda:cusolver_lib", ], ) @@ -778,9 +778,9 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", ] + if_cuda([ - "@local_xla//xla/stream_executor/cuda:cusparse_lib", "@local_xla//xla/stream_executor/cuda:cuda_blas_utils", "@local_xla//xla/stream_executor:data_type", + "@local_tsl//tsl/cuda:cusparse", "@local_config_cuda//cuda:cub_headers", ]) + if_rocm([ "@local_xla//xla/stream_executor/rocm:rocsolver_wrapper", diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 46de10211d6ccb..374ba47563aa10 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -1867,7 +1867,7 @@ cc_library( "@local_tsl//tsl/platform:status", ] + if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", - "//xla/stream_executor/cuda:cusolver_lib", + "@local_tsl//tsl/cuda:cusolver", ]) + if_rocm_is_configured([ "@local_config_rocm//rocm:rocm_headers", "//xla/stream_executor/rocm:rocblas_wrapper", diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index b008a8a17559d1..22d30bf5fd23f9 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -185,12 +185,6 @@ xla_cc_test( ], ) -alias( - name = "cudart_stub", - actual = "@local_tsl//tsl/cuda:cudart", - visibility = ["//visibility:public"], -) - # The activation library is tightly coupled to the executor library. # TODO(leary) split up cuda_gpu_executor.cc so that this can stand alone. cc_library( @@ -230,18 +224,6 @@ cc_library( ]), ) -alias( - name = "cublas_stub", - actual = "@local_tsl//tsl/cuda:cublas", - visibility = ["//visibility:public"], -) - -alias( - name = "cublas_lib", - actual = "@local_tsl//tsl/cuda:cublas", - visibility = ["//visibility:public"], -) - cc_library( name = "cublas_lt_header", hdrs = if_cuda_is_configured([ @@ -258,18 +240,6 @@ cc_library( ]) + ["@local_tsl//tsl/platform:errors"], ) -alias( - name = "cublas_lt_stub", - actual = "@local_tsl//tsl/cuda:cublas_lt", - visibility = ["//visibility:public"], -) - -alias( - name = "cublas_lt_lib", - actual = "@local_tsl//tsl/cuda:cublas_lt", - visibility = ["//visibility:public"], -) - cc_library( name = "cublas_plugin", srcs = if_cuda_is_configured([ @@ -282,8 +252,6 @@ cc_library( ]), visibility = ["//visibility:public"], deps = if_cuda_is_configured([ - ":cublas_lib", - ":cublas_lt_lib", ":cuda_activation", ":cuda_blas_utils", ":cuda_gpu_executor", @@ -308,6 +276,8 @@ cc_library( "//xla/stream_executor/gpu:gpu_timer", "//xla/stream_executor/gpu:gpu_types_header", "//xla/stream_executor/platform", + "@local_tsl//tsl/cuda:cublas", + "@local_tsl//tsl/cuda:cublas_lt", "@local_tsl//tsl/platform:tensor_float_32_hdr_lib", ]) + if_static([ "@local_tsl//tsl/platform:tensor_float_32_utils", @@ -321,25 +291,13 @@ cc_library( hdrs = if_cuda_is_configured(["cuda_blas_utils.h"]), visibility = ["//visibility:public"], deps = if_cuda_is_configured([ - ":cublas_lib", + "@local_tsl//tsl/cuda:cublas", "@com_google_absl//absl/strings", "@local_config_cuda//cuda:cuda_headers", "//xla/stream_executor:stream_executor_headers", ]) + ["@local_tsl//tsl/platform:errors"], ) -alias( - name = "cufft_stub", - actual = "@local_tsl//tsl/cuda:cufft", - visibility = ["//visibility:public"], -) - -alias( - name = "cufft_lib", - actual = "@local_tsl//tsl/cuda:cufft", - visibility = ["//visibility:public"], -) - cc_library( name = "cufft_plugin", srcs = if_cuda_is_configured(["cuda_fft.cc"]), @@ -351,7 +309,6 @@ cc_library( ":cuda_platform_id", ":cuda_stream", ":cuda_helpers", - ":cufft_lib", "@local_config_cuda//cuda:cuda_headers", "//xla/stream_executor:event", "//xla/stream_executor:fft", @@ -360,22 +317,11 @@ cc_library( "//xla/stream_executor/gpu:gpu_helpers_header", "//xla/stream_executor/platform", "//xla/stream_executor/platform:dso_loader", + "@local_tsl//tsl/cuda:cufft", ]) + ["@local_tsl//tsl/platform:errors"], alwayslink = True, ) -alias( - name = "cudnn_stub", - actual = "@local_tsl//tsl/cuda:cudnn", - visibility = ["//visibility:public"], -) - -alias( - name = "cudnn_lib", - actual = "@local_tsl//tsl/cuda:cudnn", - visibility = ["//visibility:public"], -) - cc_library( name = "cuda_dnn_headers", textual_hdrs = ["cuda_dnn.h"], @@ -400,7 +346,6 @@ cc_library( ":cuda_gpu_executor", ":cuda_platform_id", ":cuda_stream", - ":cudnn_lib", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -408,6 +353,7 @@ cc_library( "@eigen_archive//:eigen3", "@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cudnn_header", + "@local_tsl//tsl/cuda:cudnn", "@local_tsl//tsl/cuda:cudnn_version", "@local_tsl//tsl/platform:tensor_float_32_utils", "//xla/stream_executor:dnn", @@ -426,42 +372,6 @@ cc_library( alwayslink = True, ) -alias( - name = "cupti_stub", - actual = "@local_tsl//tsl/cuda:cupti", - visibility = ["//visibility:public"], -) - -alias( - name = "cusolver_stub", - actual = "@local_tsl//tsl/cuda:cusolver", - visibility = ["//visibility:public"], -) - -alias( - name = "cusolver_lib", - actual = "@local_tsl//tsl/cuda:cusolver", - visibility = ["//visibility:public"], -) - -alias( - name = "cusparse_stub", - actual = "@local_tsl//tsl/cuda:cusparse", - visibility = ["//visibility:public"], -) - -alias( - name = "cusparse_lib", - actual = "@local_tsl//tsl/cuda:cusparse", - visibility = ["//visibility:public"], -) - -alias( - name = "tensorrt_rpath_stub", - actual = "@local_tsl//tsl/cuda:tensorrt_rpath", - visibility = ["//visibility:public"], -) - cc_library( name = "cuda_kernel", srcs = if_cuda_is_configured(["cuda_kernel.cc"]), @@ -579,9 +489,9 @@ cc_library( ":cuda_platform", ":cudnn_plugin", ":cufft_plugin", - ":cusolver_lib", - ":cusparse_lib", - ":tensorrt_rpath_stub", + "@local_tsl//tsl/cuda:cusolver", + "@local_tsl//tsl/cuda:cusparse", + "@local_tsl//tsl/cuda:tensorrt_rpath", ], alwayslink = 1, ) @@ -609,7 +519,7 @@ cc_library( ], }), [ - ":cudart_stub", + "@local_tsl//tsl/cuda:cudart", ] + select({ "@local_tsl//tsl:macos": ["IOKit"], "//conditions:default": [], From 9770efd3b28709716705ffd11af55fa1c7a60637 Mon Sep 17 00:00:00 2001 From: Chao Date: Wed, 20 Sep 2023 11:37:18 -0700 Subject: [PATCH 042/567] PR #5749: [ROCm] gpu command buffer for ROCm Imported from GitHub PR https://github.com/openxla/xla/pull/5749 ROCm enable command buffer https://github.com/openxla/xla/commit/ec74b3b29d549ba97f499751651684800ab55aa6 @ezhulenev @akuegel Thanks in advance Copybara import of the project: -- aca5aa0e853b201c94420f41c22f5b837dd97f76 by Chao Chen : ROCm adds gpu command buffer Merging this change closes #5749 PiperOrigin-RevId: 567031199 --- third_party/xla/xla/stream_executor/rocm/BUILD | 1 + .../xla/xla/stream_executor/rocm/rocm_gpu_executor.cc | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/third_party/xla/xla/stream_executor/rocm/BUILD b/third_party/xla/xla/stream_executor/rocm/BUILD index 5353a5ec9ad721..a4c1bf64976212 100644 --- a/third_party/xla/xla/stream_executor/rocm/BUILD +++ b/third_party/xla/xla/stream_executor/rocm/BUILD @@ -115,6 +115,7 @@ cc_library( "//xla/stream_executor/gpu:gpu_activation_header", "//xla/stream_executor/gpu:gpu_event", "//xla/stream_executor/gpu:gpu_kernel_header", + "//xla/stream_executor/gpu:gpu_command_buffer", "//xla/stream_executor/gpu:gpu_stream", "//xla/stream_executor/gpu:gpu_timer", "//xla/stream_executor/platform", diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc index 20024780b067c4..5ad9feb70e5652 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc @@ -15,6 +15,8 @@ limitations under the License. #include +#include + #include "absl/base/casts.h" #include "absl/functional/any_invocable.h" #include "absl/strings/ascii.h" @@ -22,9 +24,11 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "xla/stream_executor/gpu/gpu_command_buffer.h" #include "xla/stream_executor/gpu/gpu_driver.h" #include "xla/stream_executor/gpu/gpu_event.h" #include "xla/stream_executor/gpu/gpu_executor.h" +#include "xla/stream_executor/gpu/gpu_kernel.h" #include "xla/stream_executor/gpu/gpu_stream.h" #include "xla/stream_executor/gpu/gpu_timer.h" #include "xla/stream_executor/kernel_cache_config.h" @@ -715,6 +719,12 @@ GpuExecutor::GetStreamImplementation() { return std::unique_ptr(new GpuStream(this)); } +tsl::StatusOr> +GpuExecutor::GetCommandBufferImplementation() { + return std::unique_ptr( + new GpuCommandBuffer()); +} + void* GpuExecutor::GpuContextHack() { return context_; } GpuContext* GpuExecutor::gpu_context() { return context_; } From f17f1a7b0c0abc9b1e0b11a461f71685b51aae2d Mon Sep 17 00:00:00 2001 From: Ziyin Huang Date: Wed, 20 Sep 2023 12:06:51 -0700 Subject: [PATCH 043/567] add id shuffling to the sparse core preprocess ops. PiperOrigin-RevId: 567040448 --- tensorflow/core/tpu/kernels/BUILD | 2 - .../tpu/kernels/sparse_core_preprocess_ops.cc | 56 ++++++++++--------- .../tpu/kernels/sparse_core_preprocess_ops.h | 6 ++ 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 0c600b45d37dbd..e75f17f57625d7 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -142,10 +142,8 @@ cc_library( ":sparse_core_ops_stats_handler", ":sparse_core_ops_utils", "//tensorflow/core:framework", - "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/platform:errors", "//tensorflow/core/platform:status", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/log", diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc index df3364f1ec8407..7181edfe33d7b2 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc @@ -356,8 +356,7 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { const int num_physical_replica = num_replica_ * num_sc_per_chip_; - size_t xla_pad_size = stream_executor::tpu::OpsApiFn() - ->TpuUtil_GetXlaPadSizeFromTpuTopologyFn(); + size_t xla_pad_size = 8; OP_REQUIRES(ctx, sample_count_ % num_sc_per_chip_ == 0, absl::InvalidArgumentError( @@ -646,12 +645,11 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { (kMaxDivisions + 1)); std::vector record_total_unique_id_counter(num_physical_replica * (kMaxDivisions + 1)); - // Array which keeps track of the index of each physical replica. - std::vector per_physical_replica_index(num_physical_replica); - // Accumulated sum of the id count for each physical replica. - std::vector physical_replica_id_count((num_physical_replica + 1) * - num_sc_per_chip_); + // Array which keeps track of the index of each physical replica and each + // bucket. + std::vector per_physical_replica_bucket_index(num_physical_replica * + kMaxDivisions); // Id counts for each sc input. std::vector per_sc_id_count(num_sc_per_chip_, 0); @@ -696,10 +694,15 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { if (row_id != previous_row_id || col_id != previous_col_id) { dedup_ids_index_mapping[id_array_index] = id_array_index; gains_after_dedup[id_array_index] = *(gains_ptr + id_array_index); - int32 replica_id = col_id % num_physical_replica; - int32 bucket_id = col_id / division_size + 1; + int32_t replica_id = col_id % num_physical_replica; + int32_t bucket_id; + if (allow_id_shuffling_for_minibatching_) { + bucket_id = CalculateBucketIdWithHashing(col_id, kMaxDivisions); + } else { + bucket_id = std::min(col_id / division_size, kMaxDivisions - 1); + } uint32_t id_counter_index = - replica_id * (kMaxDivisions + 1) + bucket_id; + replica_id * (kMaxDivisions + 1) + bucket_id + 1; record_total_id_counter[id_counter_index]++; if (col_id != previous_col_id) record_total_unique_id_counter[id_counter_index]++; @@ -770,11 +773,6 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { this_max_ids = std::max(this_max_ids, record_id_counter[kMaxDivisions]); this_max_uniques = std::max(this_max_uniques, record_unique_id_counter[kMaxDivisions]); - physical_replica_id_count[sc_id * (num_physical_replica + 1) + - replica_id + 1] = - physical_replica_id_count[sc_id * (num_physical_replica + 1) + - replica_id] + - id_counter[kMaxDivisions]; per_sc_id_count[sc_id] += id_counter[kMaxDivisions]; for (int level = 0; level < max_division_level; ++level) { @@ -868,11 +866,9 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { sorted_col_ids_tensor->flat().data(); float* sorted_gains_tensor_ptr = sorted_gains_tensor->flat().data(); - int32_t previous_index = 0; - for (int sc_id = 0; sc_id < num_sc_per_chip_; ++sc_id) { - memset(per_physical_replica_index.data(), 0, - num_physical_replica * sizeof(int32)); + memset(per_physical_replica_bucket_index.data(), 0, + num_physical_replica * kMaxDivisions * sizeof(int32_t)); for (uint64_t item : col_ids_index_list[sc_id]) { uint32_t id_array_index = item & 0xffffffff; // Skip deduped ids. @@ -881,20 +877,30 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { continue; } int32_t col_id = item >> 32; + std::string col_id_str = std::to_string(col_id); int32_t replica_id = col_id % num_physical_replica; - int32_t main_index = - per_physical_replica_index[replica_id] + previous_index + - physical_replica_id_count[sc_id * (num_physical_replica + 1) + - replica_id]; + int32_t bucket_id; + int32_t main_index; + if (allow_id_shuffling_for_minibatching_) { + bucket_id = CalculateBucketIdWithHashing(col_id, kMaxDivisions); + } else { + bucket_id = std::min(col_id / division_size, kMaxDivisions - 1); + } + main_index = + per_physical_replica_bucket_index[replica_id * kMaxDivisions + + bucket_id] + + *(id_counts_tensor_ptr + + (sc_id * num_physical_replica + replica_id) * kMaxDivisions + + bucket_id); + ++per_physical_replica_bucket_index[replica_id * kMaxDivisions + + bucket_id]; *(sorted_row_ids_tensor_ptr + main_index) = *(row_ids_ptr + id_array_index) % per_sc_sample_count; *(sorted_col_ids_tensor_ptr + main_index) = col_id / num_physical_replica; // Use the updated gains instead. *(sorted_gains_tensor_ptr + main_index) = gains_after_dedup[id_array_index]; - per_physical_replica_index[replica_id]++; } - previous_index += per_sc_id_count[sc_id]; } sprase_core_ops_stats_handler_->Record( diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h index b1f2e8802eac9a..0ecf6214f1f4a5 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h @@ -112,11 +112,17 @@ class GetMinibatchSplitsWithPhysicalReplicaOp : public OpKernel { tstring program_key, int64_t max_ids_per_partition, int64_t max_unique_ids_per_partition) {} + virtual inline int32_t CalculateBucketIdWithHashing(int32_t col_id, + int32_t num_buckets) { + // TODO(pineapplejuice233): Add a proper hashing function here. + return col_id % num_buckets; + } std::string device_name_; std::string table_name_; std::unique_ptr sprase_core_ops_stats_handler_; bool allow_id_dropping_for_minibatching_ = false; + bool allow_id_shuffling_for_minibatching_ = false; private: int num_replica_ = 1; From 6dc6334a711da5b8135fe6da1445a0b259ab69ac Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Wed, 20 Sep 2023 12:10:30 -0700 Subject: [PATCH 044/567] Rollback of commit a9075649062815e7859544728a0c3e0af9311a0c Pass along the `shape` param from `tf.compat.v1.get_variable` to its underlying variable creator. At this point, we have already checked that either `shape` is compatible w/ `initial_value`, or that it is not specified (`None`). Therefore it should alway... PiperOrigin-RevId: 567041608 --- .../feature_column/feature_column_test.py | 28 ++++++------------- .../feature_column/feature_column_v2_test.py | 12 +++----- tensorflow/python/ops/variable_scope.py | 17 ++++------- 3 files changed, 17 insertions(+), 40 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index f8fbd8db7e3b15..3cde6c5657edc1 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -4924,19 +4924,16 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): - self.assertEqual(dtypes.float32, dtype) if partition_variables: - assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) - return array_ops.slice( - embedding_values, partition_info.var_offset, shape - ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - return embedding_values + + self.assertEqual(dtypes.float32, dtype) + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( @@ -4979,12 +4976,7 @@ def _initializer(shape, dtype, partition_info=None): for v in global_vars: self.assertIsInstance(v, variables_lib.Variable) with _initialized_session(): - if partition_variables: - self.assertAllEqual( - embedding_values, array_ops.concat(global_vars, axis=0) - ) - else: - self.assertAllEqual(embedding_values, global_vars[0]) + self.assertAllEqual(embedding_values, global_vars[0]) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) if use_safe_embedding_lookup: @@ -5791,19 +5783,16 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): - self.assertEqual(dtypes.float32, dtype) if partition_variables: - assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) - return array_ops.slice( - embedding_values, partition_info.var_offset, shape - ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - return embedding_values + + self.assertEqual(dtypes.float32, dtype) + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( @@ -5853,11 +5842,10 @@ def _initializer(shape, dtype, partition_info=None): self.assertCountEqual(('vars/embedding_weights/part_0:0', 'vars/embedding_weights/part_1:0'), tuple([v.name for v in global_vars])) - embedding_var = array_ops.concat(global_vars, axis=0) else: self.assertCountEqual(('vars/embedding_weights:0',), tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index cb98bf60c03184..21dcbb4452d6c7 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -5762,19 +5762,16 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): - self.assertEqual(dtypes.float32, dtype) if partition_variables: - assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) - return array_ops.slice( - embedding_values, partition_info.var_offset, shape - ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - return embedding_values + + self.assertEqual(dtypes.float32, dtype) + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( @@ -5824,11 +5821,10 @@ def _initializer(shape, dtype, partition_info=None): self.assertCountEqual(('vars/aaa_bbb_shared_embedding/part_0:0', 'vars/aaa_bbb_shared_embedding/part_1:0'), tuple([v.name for v in global_vars])) - embedding_var = array_ops.concat(global_vars, axis=0) else: self.assertCountEqual(('vars/aaa_bbb_shared_embedding:0',), tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 622f70732adaba..33dd0438fa2f2f 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -807,8 +807,7 @@ def _get_partitioned_variable(self, use_resource=use_resource, constraint=constraint, synchronization=synchronization, - aggregation=aggregation, - ) + aggregation=aggregation) # pylint: disable=protected-access var._set_save_slice_info( @@ -881,8 +880,7 @@ def _get_single_variable(self, raise ValueError("If initializer is a constant, do not specify shape.") dtype = dtypes.as_dtype(dtype) - if shape is not None: - shape = tensor_shape.as_shape(shape) + shape = tensor_shape.as_shape(shape) if name in self._vars: # Here we handle the case when returning an existing variable. @@ -903,9 +901,7 @@ def _get_single_variable(self, raise ValueError("%s Originally defined at:\n\n%s" % (err_msg, "".join(traceback.format_list(tb)))) found_var = self._vars[name] - if shape is not None and not shape.is_compatible_with( - found_var.get_shape() - ): + if not shape.is_compatible_with(found_var.get_shape()): raise ValueError("Trying to share variable %s, but specified shape %s" " and found shape %s." % (name, shape, found_var.get_shape())) @@ -925,7 +921,6 @@ def _get_single_variable(self, # Create the tensor to initialize the variable with default value. if initializer is None: - assert shape is not None initializer, initializing_from_value = self._get_default_initializer( name=name, shape=shape, dtype=dtype) # Enter an init scope when creating the initializer. @@ -937,7 +932,7 @@ def _get_single_variable(self, # Instantiate initializer if provided initializer is a type object. if tf_inspect.isclass(initializer): initializer = initializer() - if shape is not None and shape.is_fully_defined(): + if shape.is_fully_defined(): if "partition_info" in tf_inspect.getargspec(initializer).args: init_val = functools.partial(initializer, shape.as_list(), @@ -972,9 +967,7 @@ def _get_single_variable(self, constraint=constraint, use_resource=use_resource, synchronization=synchronization, - aggregation=aggregation, - shape=shape, - ) + aggregation=aggregation) if context.executing_eagerly() and self._store_eager_variables: if collections: ops.add_to_collections(collections, v) From 04c9a3483f02e6d04bb90032df5812065e5d4143 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Wed, 20 Sep 2023 12:27:57 -0700 Subject: [PATCH 045/567] [XLA:GPU] Triton GEMM: fix fusion traversal logic. PiperOrigin-RevId: 567046240 --- .../xla/service/gpu/gemm_rewriter_triton.cc | 83 +++++++++++-------- .../service/gpu/gemm_rewriter_triton_test.cc | 24 +++++- 2 files changed, 70 insertions(+), 37 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc index 70090a5070ce2d..8f1e0399ddb13a 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc @@ -1234,49 +1234,60 @@ void FusionContext::TryToFuseWithInputsRecursively( old_to_new_mapping, std::vector& fusion_inputs, HloComputation::Builder& builder) { - absl::flat_hash_set visited; - std::stack to_fuse; - // Instructions at the edge of 'to_fuse' that can either get fused too or - // become parameters of the fusion. Used to track the number of parameters - // of the fusion. + // Instructions at the fusion edge that can either get fused too or + // become parameters of the fusion. Used to track the number of parameters. absl::flat_hash_set inputs; - auto try_fuse_one = [&](HloInstruction& hlo) { + // Traverse all connected instructions that could be fused, analyze them and + // collect ones that will be fused. + absl::flat_hash_set to_fuse_set; + std::list to_fuse_list; + absl::flat_hash_set enqueued; + std::queue to_visit; + to_visit.push(&root); + while (!to_visit.empty()) { + HloInstruction* hlo = to_visit.front(); + to_visit.pop(); + // Limit the total number of fusion parameters. + if (inputs.size() >= TritonFusionAnalysis::kMaxParameterPerScope && + NumAddedParameters(*hlo) > 0) { + continue; + } const DimOrderUpdatesOrError result = AnalyzeForFusion( - hlo, /*as_input=*/true, old_to_new_mapping, gpu_version); - if (!std::holds_alternative(result)) { - return false; + *hlo, /*as_input=*/true, old_to_new_mapping, gpu_version); + if (!std::holds_alternative(result) || + !MergeUpdates(std::get(result))) { + continue; } - - if (!MergeUpdates(std::get(result))) { - return false; + if (hlo->opcode() != HloOpcode::kParameter) { + inputs.erase(hlo); } - to_fuse.push(&hlo); - if (hlo.opcode() != HloOpcode::kParameter) { - inputs.erase(&hlo); + inputs.insert(hlo->operands().cbegin(), hlo->operands().cend()); + to_fuse_set.insert(hlo); + to_fuse_list.push_back(hlo); + for (HloInstruction* operand : hlo->operands()) { + if (enqueued.insert(operand).second) { + to_visit.push(operand); + } } - inputs.insert(hlo.operands().cbegin(), hlo.operands().cend()); - return true; - }; - try_fuse_one(root); - visited.insert(&root); - while (!to_fuse.empty()) { - bool top_is_ready_to_fuse = true; - HloInstruction* hlo = to_fuse.top(); - for (HloInstruction* operand : hlo->mutable_operands()) { - if (visited.insert(operand).second) { - // Stop adding new parameters. - if (inputs.size() >= TritonFusionAnalysis::kMaxParameterPerScope && - NumAddedParameters(*operand) > 0) { - continue; - } - if (try_fuse_one(*operand)) { - top_is_ready_to_fuse = false; + } + // Find one by one instructions that have no operands queued to be fused and + // fuse them. + while (!to_fuse_list.empty()) { + for (auto it = to_fuse_list.begin(); it != to_fuse_list.end();) { + bool ready_to_fuse = true; + for (const HloInstruction* operand : (*it)->operands()) { + if (to_fuse_set.contains(operand)) { + ready_to_fuse = false; + break; } } - } - if (top_is_ready_to_fuse) { - Fuse(*hlo, old_to_new_mapping, fusion_inputs, builder); - to_fuse.pop(); + if (ready_to_fuse) { + Fuse(**it, old_to_new_mapping, fusion_inputs, builder); + to_fuse_set.erase(*it); + it = to_fuse_list.erase(it); + } else { + ++it; + } } } } diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc index 4164a295948e69..b91abc7f617ffe 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc @@ -1521,6 +1521,28 @@ ENTRY e { EXPECT_FALSE(GemmRewriterTriton(cc).Run(module.get()).value()); } +TEST_F(GemmRewriterTritonTest, MultipleUsesAreHandled) { + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(R"( +ENTRY e { + c = f32[] constant(1) + b = f32[6,8] broadcast(c), dimensions={} + p0 = f32[6,8] parameter(0) + a1 = f32[6,8] add(p0, b) + e = f32[6,8] exponential(a1) + a2 = f32[6,8] add(e, b) + d = f32[6,8] divide(b, a2) + p2 = f16[8,6] parameter(1) + cv = f32[8,6] convert(p2) + ROOT r = f32[6,6] dot(d, cv), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})")); + const se::CudaComputeCapability cc{se::CudaComputeCapability::AMPERE, 0}; + EXPECT_TRUE(GemmRewriterTriton(cc).Run(module.get()).value()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter(), m::Parameter()))); +} + class GemmRewriterTritonLevel2Test : public GemmRewriterTritonTest { public: DebugOptions GetDebugOptionsForTest() override { @@ -1568,7 +1590,7 @@ ENTRY e { EXPECT_TRUE(GemmRewriterTriton(gpu_version_).Run(module.get()).value()); EXPECT_THAT( module->entry_computation()->root_instruction(), - GmockMatch(m::Fusion(m::Parameter(), m::Transpose(), m::Parameter()))); + GmockMatch(m::Fusion(m::Transpose(), m::Parameter(), m::Parameter()))); } TEST_F(GemmRewriterTritonLevel2Test, DoNotFuseTooManyParameters) { From 0e3480236cec19ea558cd93dd017013e5cfee1b3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 12:39:26 -0700 Subject: [PATCH 046/567] include THIRD_PARTY_NOTICES.txt in the wheel. PiperOrigin-RevId: 567049176 --- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/tools/pip_package/MANIFEST.in | 1 + tensorflow/tools/pip_package/build_pip_package.sh | 1 + 3 files changed, 3 insertions(+) diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 5afeb1ce7f62bb..4d7b9194ab80e6 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -98,6 +98,7 @@ DYNAMIC_LOADED_KERNELS = [ COMMON_PIP_DEPS = [ ":licenses", "MANIFEST.in", + "THIRD_PARTY_NOTICES.txt", "README", "setup.py", ":included_headers", diff --git a/tensorflow/tools/pip_package/MANIFEST.in b/tensorflow/tools/pip_package/MANIFEST.in index 921e4a60f1794f..d9d6779e161c0e 100644 --- a/tensorflow/tools/pip_package/MANIFEST.in +++ b/tensorflow/tools/pip_package/MANIFEST.in @@ -1,5 +1,6 @@ include LICENSE include README +include tensorflow/THIRD_PARTY_NOTICES.txt recursive-include * *.py recursive-include * *.pyd recursive-include * *.pyi diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index d83f2096f277a1..deff63b8d15557 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -277,6 +277,7 @@ function prepare_src() { mkdir -p ${TMPDIR}/third_party cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp -LR $RUNFILES/../local_config_cuda/cuda/_virtual_includes/cuda_headers_virtual/third_party/gpus ${TMPDIR}/third_party + cp $RUNFILES/tensorflow/tools/pip_package/THIRD_PARTY_NOTICES.txt "${TMPDIR}/tensorflow" reorganize_includes "${TMPDIR}" From 5b534b09c7ec440176d3ac85f8c0976217c0830b Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Wed, 20 Sep 2023 13:21:45 -0700 Subject: [PATCH 047/567] Update metrics name from v0 to v1 PiperOrigin-RevId: 567061192 --- .../compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc | 2 +- .../compiler/mlir/tf2xla/internal/legalize_tf_to_hlo_test.cc | 4 ++-- tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf.cc | 4 ++-- .../compiler/mlir/tf2xla/transforms/xla_legalize_tf_test.cc | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc index d2834844bd6a61..1485028adf33fc 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc @@ -70,7 +70,7 @@ TEST(LegalizeMlirTest, FailsLegalizesModule) { } })"; CellReader count( - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_pass_count"); + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_pass_count"); std::vector arg_shapes; XlaCompilationResult compilation_result; diff --git a/tensorflow/compiler/mlir/tf2xla/internal/legalize_tf_to_hlo_test.cc b/tensorflow/compiler/mlir/tf2xla/internal/legalize_tf_to_hlo_test.cc index b6a3463234f5f0..67de8464a9c587 100644 --- a/tensorflow/compiler/mlir/tf2xla/internal/legalize_tf_to_hlo_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/internal/legalize_tf_to_hlo_test.cc @@ -47,9 +47,9 @@ using tpu::ShardingAndIndex; using tpu::TPUCompileMetadataProto; static constexpr char kMlirLegalizeCount[] = - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_count"; + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_count"; static constexpr char kMlirLegalizeErrors[] = - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_pass_count"; + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_pass_count"; static constexpr char kBridgeStatusCounter[] = "/tensorflow/core/tf2xla/api/v2/phase2_compilation_status"; constexpr char kMlirCombinedMlirSuccess[] = "kMlirCombinedMlirSuccess"; diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf.cc b/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf.cc index 32ff6ad83c6f42..2cb569635226d1 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf.cc @@ -55,11 +55,11 @@ namespace { #include "tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf_passes.h.inc" auto *mlir_legalization_count = tensorflow::monitoring::Counter<1>::New( - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_count", + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_count", "Counts the attempts of legalization of ops", "op_name"); auto *mlir_failed_legalization_count = tensorflow::monitoring::Counter<2>::New( - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_pass_count", + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_pass_count", "Counts the failure of legalization of ops", "op_name", "legality"); class LegalizeTF : public impl::LegalizeTFBase { diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf_test.cc b/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf_test.cc index b9085c12110d72..39eadcb93fcfce 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_tf_test.cc @@ -88,7 +88,7 @@ TEST(XlaLegalizeTest, IllegalOp) { } })"; CellReader legalize_failure_count( - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_pass_count"); + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_pass_count"); auto status = BuildAndRunPipeline(kMlirIllegalOpStr, legalizeTFPasses()); @@ -103,7 +103,7 @@ TEST(XlaLegalizeTest, LegalOp) { %0:2 = "tf.InfeedDequeueTuple"() : () -> (tensor<3x3xf32>, tensor<4x?xf32>) func.return %0#0, %0#1 : tensor<3x3xf32>, tensor<4x?xf32> })"; CellReader legalize_failure_count( - "/tensorflow/core/tf2xla/v0/mlir_failed_xla_legalize_tf_pass_count"); + "/tensorflow/core/tf2xla/v1/mlir_failed_xla_legalize_tf_pass_count"); auto status = BuildAndRunPipeline(kMlirLegalOpStr, legalizeTFPasses()); From 9258e1aeb9fdbdd8566847f31a7c64243ba6d599 Mon Sep 17 00:00:00 2001 From: Kanglan Tang Date: Wed, 20 Sep 2023 13:25:39 -0700 Subject: [PATCH 048/567] Update tpu build flags in .bazelrc PiperOrigin-RevId: 567062395 --- .bazelrc | 5 ++++- third_party/xla/.bazelrc | 5 ++++- third_party/xla/third_party/tsl/.bazelrc | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.bazelrc b/.bazelrc index 35ff0dc1440b7f..8fb09a849a8b57 100644 --- a/.bazelrc +++ b/.bazelrc @@ -267,8 +267,11 @@ build:dbg --cxxopt -DTF_LITE_DISABLE_X86_NEON # AWS SDK must be compiled in release mode. see: https://github.com/tensorflow/tensorflow/issues/37498 build:dbg --copt -DDEBUG_BUILD -# Config to build TPU backend +# Config to build TF TPU build:tpu --define=with_tpu_support=true +build:tpu --define=framework_shared_object=true +build:tpu --copt=-DLIBTPU_ON_GCE +build:tpu --define=enable_mlir_bridge=true build:tensorrt --repo_env TF_NEED_TENSORRT=1 diff --git a/third_party/xla/.bazelrc b/third_party/xla/.bazelrc index 35ff0dc1440b7f..8fb09a849a8b57 100644 --- a/third_party/xla/.bazelrc +++ b/third_party/xla/.bazelrc @@ -267,8 +267,11 @@ build:dbg --cxxopt -DTF_LITE_DISABLE_X86_NEON # AWS SDK must be compiled in release mode. see: https://github.com/tensorflow/tensorflow/issues/37498 build:dbg --copt -DDEBUG_BUILD -# Config to build TPU backend +# Config to build TF TPU build:tpu --define=with_tpu_support=true +build:tpu --define=framework_shared_object=true +build:tpu --copt=-DLIBTPU_ON_GCE +build:tpu --define=enable_mlir_bridge=true build:tensorrt --repo_env TF_NEED_TENSORRT=1 diff --git a/third_party/xla/third_party/tsl/.bazelrc b/third_party/xla/third_party/tsl/.bazelrc index 35ff0dc1440b7f..8fb09a849a8b57 100644 --- a/third_party/xla/third_party/tsl/.bazelrc +++ b/third_party/xla/third_party/tsl/.bazelrc @@ -267,8 +267,11 @@ build:dbg --cxxopt -DTF_LITE_DISABLE_X86_NEON # AWS SDK must be compiled in release mode. see: https://github.com/tensorflow/tensorflow/issues/37498 build:dbg --copt -DDEBUG_BUILD -# Config to build TPU backend +# Config to build TF TPU build:tpu --define=with_tpu_support=true +build:tpu --define=framework_shared_object=true +build:tpu --copt=-DLIBTPU_ON_GCE +build:tpu --define=enable_mlir_bridge=true build:tensorrt --repo_env TF_NEED_TENSORRT=1 From 9d138e582de5f9de67afc7608ac9258a7d6670f1 Mon Sep 17 00:00:00 2001 From: Michael Hudgins Date: Wed, 20 Sep 2023 13:36:53 -0700 Subject: [PATCH 049/567] Modify ARM64 docker images to build under TF repo PiperOrigin-RevId: 567065712 --- ci/official/containers/linux_arm64/build.sh | 43 ++++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/ci/official/containers/linux_arm64/build.sh b/ci/official/containers/linux_arm64/build.sh index f6477b3c9040c9..dc30a354f20f0d 100755 --- a/ci/official/containers/linux_arm64/build.sh +++ b/ci/official/containers/linux_arm64/build.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,14 +16,37 @@ set -e -export LANG=C +is_continuous_or_release() { + [[ "$KOKORO_JOB_TYPE" == "CONTINUOUS_INTEGRATION" ]] || [[ "${KOKORO_JOB_TYPE}" == "RELEASE" ]] +} -release_tag=2.15 +# Move into the directory of the script +cd "$(dirname "$0")" -docker build --pull \ - --tag=linaro/tensorflow-arm64-build:latest-multipython \ - --tag=linaro/tensorflow-arm64-build:${release_tag}-multipython . -mkdir -p tagdir-multipython -echo linaro/tensorflow-arm64-build:latest-multipython > tagdir-multipython/.docker-tag -mkdir -p tagdir-${release_tag}-multipython -echo linaro/tensorflow-arm64-build:${release_tag}-multipython > tagdir-${release_tag}-multipython/.docker-tag +if is_continuous_or_release; then + # A continuous job is the only one to publish to latest + TAG="latest-multi-python" +else + # If it is a change, grab a good tag for iterative builds + if [[ -z "${KOKORO_GITHUB_PULL_REQUEST_NUMBER}" ]]; then + TAG=$(head -n 1 "$KOKORO_PIPER_DIR/presubmit_request.txt" | cut -d" " -f2) + else + TAG="pr-${KOKORO_GITHUB_PULL_REQUEST_NUMBER}" + fi +fi + +# IMAGE="gcr.io/tensorflow-sigs/build-arm64:$TAG-$PYVER" +IMAGE="gcr.io/tensorflow-sigs/build-arm64:$TAG" +docker pull "$IMAGE" || true + +gcloud auth configure-docker + +# TODO(michaelhudgins): align with sig build and make it so not every python is +# being included in a single image +# --build-arg "PYTHON_VERSION=$PYVER" \ +DOCKER_BUILDKIT=1 docker build \ + --cache-from "$IMAGE" \ + --target=devel \ + -t "$IMAGE" . + +docker push "$IMAGE" From 2ebb1b5b001d730a7077d8d60ff459443a54fb7c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 13:45:15 -0700 Subject: [PATCH 050/567] Remove fallback to the old xla builder bridge and rely on the combined bridge's call. PiperOrigin-RevId: 567068282 --- .../mlir/tf2xla/api/v2/legalize_tf.cc | 55 +------------------ 1 file changed, 1 insertion(+), 54 deletions(-) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/legalize_tf.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/legalize_tf.cc index 6fe12137d78966..ae7bfa8076d917 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/legalize_tf.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/legalize_tf.cc @@ -134,60 +134,7 @@ tsl::StatusOr LegalizeMlirToHlo( combined_bridge_status.status().ToString()) .IgnoreError(); - Status old_bridge_status = tf2xla::v1::CompileTensorflowGraphToHlo( - computation, metadata, use_tuple_args, shape_determination_fns, - arg_shapes, arg_core_mapping, per_core_arg_shapes, client, - compilation_result.get()); - - // Record filter/failure stats only if the old bridge succeeds. This removes - // noise from invalid inputs. - if (!old_bridge_status.ok()) { - // If the old bridge failed for this input as well. Mark the input as - // invalid. This might be incorrect in case of old bridge bugs but that - // should be rare. - if (filtered_graph) { - IncrementTfMlirBridgeSecondPhaseCounter( - MlirBridgeSecondPhaseMetric ::kOldBridgeMlirFilteredFailure); - } else { - IncrementTfMlirBridgeSecondPhaseCounter( - MlirBridgeSecondPhaseMetric ::kOldBridgeWithFallbackModeFailure); - } - if (!old_bridge_status.ok()) { - tsl::error_logging::Log(kBridgeComponent, "TFXLA_API_V2_OLD_BRIDGE", - mlir_bridge_status.status().ToString()) - .IgnoreError(); - } - return old_bridge_status; - } - - if (VLOG_IS_ON(2)) { - TF_ASSIGN_OR_RETURN( - auto hlo_module_config, - xla::HloModule::CreateModuleConfigFromProto( - compilation_result->computation->proto(), xla::DebugOptions())); - - TF_ASSIGN_OR_RETURN( - std::unique_ptr hlo_module, - xla::HloModule::CreateFromProto( - compilation_result->computation->proto(), hlo_module_config)); - - std::string all_computations; - for (auto computation : hlo_module->computations()) { - all_computations += computation->ToString() + "\n\n"; - } - - tensorflow::DumpRawStringToFile("legalize_tf_fallback_hlo", - all_computations); - } - - if (filtered_graph) { - IncrementTfMlirBridgeSecondPhaseCounter( - MlirBridgeSecondPhaseMetric ::kOldBridgeMlirFilteredSuccess); - } else { - IncrementTfMlirBridgeSecondPhaseCounter( - MlirBridgeSecondPhaseMetric ::kOldBridgeWithFallbackModeSuccess); - } - return *compilation_result; + return combined_bridge_status.status(); } }; // namespace v2 From 03c58a3fa7229078dd4fab969dc18dc2c8a7265a Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 20 Sep 2023 13:46:41 -0700 Subject: [PATCH 051/567] [stream_executor] Remove unused gpu_launch_dim.h PiperOrigin-RevId: 567068762 --- third_party/xla/xla/stream_executor/BUILD | 7 +----- .../xla/xla/stream_executor/gpu_launch_dim.h | 23 ------------------- 2 files changed, 1 insertion(+), 29 deletions(-) delete mode 100644 third_party/xla/xla/stream_executor/gpu_launch_dim.h diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 6760fa8d6fa477..14348bd2a6e9e5 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -36,7 +36,6 @@ cc_library( "event.h", "executor_cache.h", "fft.h", - "gpu_launch_dim.h", "kernel.h", "kernel_cache_config.h", "kernel_spec.h", @@ -86,10 +85,7 @@ transitive_hdrs( cc_library( name = "launch_dim", - hdrs = [ - "gpu_launch_dim.h", - "launch_dim.h", - ], + hdrs = ["launch_dim.h"], visibility = ["//visibility:public"], deps = ["@com_google_absl//absl/strings"], ) @@ -731,7 +727,6 @@ cc_library( "event.h", "executor_cache.h", "fft.h", - "gpu_launch_dim.h", "kernel.h", "kernel_cache_config.h", "kernel_spec.h", diff --git a/third_party/xla/xla/stream_executor/gpu_launch_dim.h b/third_party/xla/xla/stream_executor/gpu_launch_dim.h deleted file mode 100644 index 8e5d23598da2bb..00000000000000 --- a/third_party/xla/xla/stream_executor/gpu_launch_dim.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_STREAM_EXECUTOR_GPU_LAUNCH_DIM_H_ -#define XLA_STREAM_EXECUTOR_GPU_LAUNCH_DIM_H_ - -// TODO(rspringer): Temporary redirection until all users - including gcudacc - -// are using the new file. -#include "xla/stream_executor/launch_dim.h" - -#endif // XLA_STREAM_EXECUTOR_GPU_LAUNCH_DIM_H_ From ea0637ac525b4b1e1794373014f1159a7e13ebcb Mon Sep 17 00:00:00 2001 From: Bing Hu Date: Wed, 20 Sep 2023 14:05:44 -0700 Subject: [PATCH 052/567] Mesh creation API should also support taking a list of DeviceSpec as input PiperOrigin-RevId: 567074638 --- tensorflow/dtensor/python/mesh_util.py | 14 ++++++----- tensorflow/dtensor/python/tests/BUILD | 4 ++++ .../dtensor/python/tests/mesh_util_test.py | 23 +++++++++++++++++-- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/tensorflow/dtensor/python/mesh_util.py b/tensorflow/dtensor/python/mesh_util.py index 9650d6f23bd7bb..12e86e321e1210 100644 --- a/tensorflow/dtensor/python/mesh_util.py +++ b/tensorflow/dtensor/python/mesh_util.py @@ -44,23 +44,25 @@ def _print_context(num_global_devices: int, num_clients: int, client_id: int, def _make_device_specs( - devices: Optional[List[str]] = None, + devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, device_type: Optional[str] = None ) -> Tuple[List[tf_device.DeviceSpec], str]: - """Makes device specs from local devices names or number of global devices.""" + """Makes device specs for all local devices or from a provided list.""" if devices is None: if device_type is None: device_type = 'CPU' devices = config.local_devices(device_type) else: - devices = [tf_device.DeviceSpec.from_string(d) for d in devices] + if isinstance(devices[0], str): + devices = [tf_device.DeviceSpec.from_string(d) for d in devices] if device_type is None: device_type = devices[0].device_type if device_type.upper() != devices[0].device_type.upper(): raise ValueError( - f'Conflicting devices {str(devices)} and device_type {device_type}') + f'Conflicting devices {str(devices)} and device_type {device_type}' + ) return devices, device_type @@ -69,7 +71,7 @@ def _make_device_specs( def create_mesh( mesh_dims: Optional[Union[List[Tuple[str, int]], Dict[str, int]]] = None, mesh_name: str = '', - devices: Optional[List[str]] = None, + devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, device_type: Optional[str] = None, use_xla_spmd: bool = layout.USE_XLA_SPMD, ) -> layout.Mesh: @@ -137,7 +139,7 @@ def create_mesh( def create_distributed_mesh( mesh_dims: Union[List[Tuple[str, int]], Dict[str, int]], mesh_name: str = '', - local_devices: Optional[List[str]] = None, + local_devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, device_type: Optional[str] = None, use_xla_spmd: bool = layout.USE_XLA_SPMD, ) -> layout.Mesh: diff --git a/tensorflow/dtensor/python/tests/BUILD b/tensorflow/dtensor/python/tests/BUILD index f2e71d18824455..801bbb267be40d 100644 --- a/tensorflow/dtensor/python/tests/BUILD +++ b/tensorflow/dtensor/python/tests/BUILD @@ -69,6 +69,9 @@ pytype_strict_library( ], ) +# TODO(b/301286466): Investigate why python annotation type mismatch is not catptured by the type +# strict BUILD rules. + dtensor_test( name = "array_ops_test", srcs = ["array_ops_test.py"], @@ -301,6 +304,7 @@ dtensor_test( "//tensorflow/dtensor/python:tpu_util", "//tensorflow/python/eager:context", "//tensorflow/python/framework:config", + "//tensorflow/python/framework:device", "//tensorflow/python/platform:client_testlib", "@absl_py//absl/testing:parameterized", ], diff --git a/tensorflow/dtensor/python/tests/mesh_util_test.py b/tensorflow/dtensor/python/tests/mesh_util_test.py index 8f51e2e2011928..b8bc9960915648 100644 --- a/tensorflow/dtensor/python/tests/mesh_util_test.py +++ b/tensorflow/dtensor/python/tests/mesh_util_test.py @@ -24,6 +24,7 @@ from tensorflow.dtensor.python.tests import test_util from tensorflow.python.eager import context from tensorflow.python.framework import config as tf_config +from tensorflow.python.framework import device as tf_device from tensorflow.python.platform import test @@ -65,7 +66,7 @@ def test_tpu_2d_mesh_creation(self, use_xla_spmd): reason='Test requires exactly 2 cores', unless_device_count_equals_to=2) devices = test_util.list_local_logical_devices('TPU') - self.assertEqual(len(devices), 2) + self.assertLen(devices, 2) mesh = mesh_util.create_mesh([('x', 2), ('y', 1)], device_type='TPU', use_xla_spmd=use_xla_spmd) @@ -80,13 +81,31 @@ def test_tpu_2d_mesh_creation_with_devices(self): reason='Test requires at least 2 cores', unless_device_count_equals_to=2) devices = test_util.list_local_logical_devices('TPU') - self.assertEqual(len(devices), 2) + self.assertLen(devices, 2) mesh = mesh_util.create_mesh([('x', 2), ('y', 1)], devices=['/device:tpu:0', '/device:tpu:1']) self.assertEqual(mesh.num_local_devices(), 2) self.assertEqual(mesh.size, 2) self.assertAllEqual(mesh.dim_names, ['x', 'y']) + def test_tpu_2d_mesh_creation_with_device_specs(self): + self.skipForDeviceType(['CPU', 'GPU'], reason='Test is intended for TPUs.') + self.skipForDeviceType(['TPU'], + reason='Test requires at least 2 cores', + unless_device_count_equals_to=2) + devices = test_util.list_local_logical_devices('TPU') + self.assertLen(devices, 2) + mesh = mesh_util.create_mesh( + [('x', 2), ('y', 1)], + devices=[ + tf_device.DeviceSpec.from_string('/tpu:0'), + tf_device.DeviceSpec.from_string('/tpu:1'), + ], + ) + self.assertEqual(mesh.num_local_devices(), 2) + self.assertEqual(mesh.size, 2) + self.assertAllEqual(mesh.dim_names, ['x', 'y']) + def test_single_client_mesh_creation(self): self.skipForDeviceType(['GPU', 'TPU'], reason='Test is intended for CPUs') num_devices = len(test_util.list_local_logical_devices('CPU')) From 8fcaa1bb3130fdcc2f859820c4de49f029e355b2 Mon Sep 17 00:00:00 2001 From: Ziyin Huang Date: Wed, 20 Sep 2023 14:15:42 -0700 Subject: [PATCH 053/567] Roll back the accidental change for the xla pad size in GetMinibatchOp. PiperOrigin-RevId: 567077638 --- tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc index 7181edfe33d7b2..d4cd8035fdc7dd 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc @@ -356,7 +356,8 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { const int num_physical_replica = num_replica_ * num_sc_per_chip_; - size_t xla_pad_size = 8; + size_t xla_pad_size = stream_executor::tpu::OpsApiFn() + ->TpuUtil_GetXlaPadSizeFromTpuTopologyFn(); OP_REQUIRES(ctx, sample_count_ % num_sc_per_chip_ == 0, absl::InvalidArgumentError( From e2624e25d0c9465f631389f4c3375d7a3df5ea47 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 14:24:40 -0700 Subject: [PATCH 054/567] Support storing bfloat16 in tensor_content format. PiperOrigin-RevId: 567079988 --- tensorflow/python/framework/tensor_util.py | 1 + tensorflow/python/framework/tensor_util_test.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index b494f52410ee2a..59fbeb3429c68d 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -300,6 +300,7 @@ def _FlattenToStrings(nested_strings): dtypes.uint64, dtypes.float8_e5m2, dtypes.float8_e4m3fn, + dtypes.bfloat16 # int4/uint4 intentionally not listed, since their binary representation # is implementation-dependent. ] diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index 88c3541fcf22fb..14619c12f48824 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -254,8 +254,6 @@ def testHalf(self): def testBfloat16(self): test_type = dtypes.bfloat16.as_numpy_dtype t = tensor_util.make_tensor_proto(np.array([10.0, 20.0], dtype=test_type)) - # 10.0: 16672 = 010000010(130) 0100000: (1+0/2+1/4) * 2^(130-127) - # 20.0: 16800 = 010000011(131) 0100000: (1+0/2+1/4) * 2^(131-127) self.assertProtoEquals(""" dtype: DT_BFLOAT16 tensor_shape { @@ -263,8 +261,7 @@ def testBfloat16(self): size: 2 } } - half_val: 16672 - half_val: 16800 + tensor_content: "\x20\x41\x5C\x32\x34\x30\x41" """, t) a = tensor_util.MakeNdarray(t) From caf5b4d890095df814d8b39d2ac9717aa32ecf1a Mon Sep 17 00:00:00 2001 From: Edward Schwartz Date: Wed, 20 Sep 2023 14:44:36 -0700 Subject: [PATCH 055/567] Remove obsolete bincount code since forward compatibility window has expired PiperOrigin-RevId: 567085770 --- tensorflow/python/ops/BUILD | 3 --- tensorflow/python/ops/bincount_ops.py | 30 ---------------------- tensorflow/python/ops/bincount_ops_test.py | 10 -------- 3 files changed, 43 deletions(-) diff --git a/tensorflow/python/ops/BUILD b/tensorflow/python/ops/BUILD index e278893370aa4a..36a41014cba5ca 100644 --- a/tensorflow/python/ops/BUILD +++ b/tensorflow/python/ops/BUILD @@ -1438,8 +1438,6 @@ py_strict_library( ":array_ops", ":math_ops", ":math_ops_gen", - "//tensorflow/python/compat", - "//tensorflow/python/framework:constant_op", "//tensorflow/python/framework:dtypes", "//tensorflow/python/framework:ops", "//tensorflow/python/framework:tensor", @@ -1459,7 +1457,6 @@ cuda_py_strict_test( ":bincount_ops", ":count_ops_gen", ":sparse_ops", - "//tensorflow/python/compat", "//tensorflow/python/framework:config", "//tensorflow/python/framework:errors", "//tensorflow/python/framework:ops", diff --git a/tensorflow/python/ops/bincount_ops.py b/tensorflow/python/ops/bincount_ops.py index 361481a975ca2e..7d19fc3a4cbffd 100644 --- a/tensorflow/python/ops/bincount_ops.py +++ b/tensorflow/python/ops/bincount_ops.py @@ -14,8 +14,6 @@ # ============================================================================== """bincount ops.""" -from tensorflow.python.compat import compat -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor @@ -132,34 +130,6 @@ def bincount(arr, """ name = "bincount" if name is None else name with ops.name_scope(name): - # TODO(b/255381064) Remove the following block which uses older kernels for - # certain cases once the forward compatibility window expries (and remove - # the imports in this file and dependencies in the BUILD file for compat - # and constant_op which are only required for this block.) - if ( - not compat.forward_compatible(2023, 9, 10) - and not binary_output - and axis is None - ): - arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32) - array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 - output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * ( - math_ops.reduce_max(arr) + 1) - if minlength is not None: - minlength = ops.convert_to_tensor( - minlength, name="minlength", dtype=dtypes.int32) - output_size = gen_math_ops.maximum(minlength, output_size) - if maxlength is not None: - maxlength = ops.convert_to_tensor( - maxlength, name="maxlength", dtype=dtypes.int32) - output_size = gen_math_ops.minimum(maxlength, output_size) - if weights is not None: - weights = ops.convert_to_tensor(weights, name="weights") - return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) - weights = constant_op.constant([], dtype) - arr = array_ops.reshape(arr, [-1]) - return gen_math_ops.bincount(arr, output_size, weights) - arr = tensor_conversion.convert_to_tensor_v2_with_dispatch(arr, name="arr") if weights is not None: weights = tensor_conversion.convert_to_tensor_v2_with_dispatch( diff --git a/tensorflow/python/ops/bincount_ops_test.py b/tensorflow/python/ops/bincount_ops_test.py index a5873215e9a130..4e14b1b8a45ef8 100644 --- a/tensorflow/python/ops/bincount_ops_test.py +++ b/tensorflow/python/ops/bincount_ops_test.py @@ -17,7 +17,6 @@ from absl.testing import parameterized import numpy as np -from tensorflow.python.compat import compat from tensorflow.python.framework import config as tf_config from tensorflow.python.framework import errors from tensorflow.python.framework import ops @@ -325,15 +324,6 @@ def test_weights( "b/263004039 The DenseBincount GPU kernel does not support weights." " unsorted_segment_sum should be used instead on GPU." ) - # TODO(b/255381064) Remove the following block which uses older kernels for - # certain cases once the forward compatibility window expries (and remove - # the imports in this file and dependencies in the BUILD file for compat - # which is only required for this block.) - if not compat.forward_compatible(2023, 9, 10): - self.skipTest( - "b/255381064 tests with weights will pass once forward comptibiliy" - " window expires" - ) if axis == -1: expected = _adjust_expected_rank2(expected, minlength, maxlength) else: From de064617a4e8c07a2f4646447e84dfa45a66c80d Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Wed, 20 Sep 2023 14:54:30 -0700 Subject: [PATCH 056/567] [XLA] Fix DCHECK in ShapeUtil::ByteSizeOf that failed when shape didn't have a layout PiperOrigin-RevId: 567088606 --- third_party/xla/xla/shape_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/shape_util.cc b/third_party/xla/xla/shape_util.cc index 0bd89ce9fdaaa6..4f0fc50bb52206 100644 --- a/third_party/xla/xla/shape_util.cc +++ b/third_party/xla/xla/shape_util.cc @@ -876,7 +876,7 @@ Shape ShapeUtil::PrependMajorDimension(int64_t bound, Shape shape) { /* static */ int64_t ShapeUtil::ByteSizeOf(const Shape& shape, int64_t pointer_size) { - TF_DCHECK_OK(ValidateShape(shape)); + TF_DCHECK_OK(ValidateShapeWithOptionalLayout(shape)); if (shape.element_type() == TUPLE) { return ByteSizeOfTupleIndexTable(shape, pointer_size); } else if (shape.IsArray()) { @@ -900,7 +900,7 @@ Shape ShapeUtil::PrependMajorDimension(int64_t bound, Shape shape) { } /* static */ int64_t ShapeUtil::ByteSizeOfElements(const Shape& shape) { - TF_DCHECK_OK(ValidateShape(shape)); + TF_DCHECK_OK(ValidateShapeWithOptionalLayout(shape)); int64_t allocated_element_count; CHECK(LayoutUtil::IsDenseArray(shape)) << shape.ShortDebugString(); From 7974a0b3afd2b53e92491a1bdea3b54294e0f6e7 Mon Sep 17 00:00:00 2001 From: Anlun Xu Date: Wed, 20 Sep 2023 15:46:33 -0700 Subject: [PATCH 057/567] [xla:gpu] NFC: Print node type instead of node pointer in error messages PiperOrigin-RevId: 567102686 --- .../xla/xla/stream_executor/gpu/gpu_graph.cc | 70 ++++++++++++++----- 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_graph.cc b/third_party/xla/xla/stream_executor/gpu/gpu_graph.cc index 70135573f266b7..c96ca81dccd6b3 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_graph.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_graph.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include #include -#include #include #include "absl/strings/str_cat.h" @@ -95,6 +94,39 @@ tsl::StatusOr GraphExecUpdateResultToString( return tsl::errors::Internal("Unexpected value for GraphExecUpdateResult"); } +tsl::StatusOr GraphNodeTypeToString( + GpuDriver::GraphNodeType node_type) { + switch (node_type) { + case GpuDriver::GraphNodeType::kKernel: + return "kKernel"; + case GpuDriver::GraphNodeType::kMemcpy: + return "kMemcpy"; + case GpuDriver::GraphNodeType::kMemset: + return "kMemset"; + case GpuDriver::GraphNodeType::kHost: + return "kHost"; + case GpuDriver::GraphNodeType::kGraph: + return "kGraph"; + case GpuDriver::GraphNodeType::kEmpty: + return "kEmpty"; + case GpuDriver::GraphNodeType::kWaitEvent: + return "kWaitEvent"; + case GpuDriver::GraphNodeType::kEventRecord: + return "kEventRecord"; + case GpuDriver::GraphNodeType::kExtSemasSignal: + return "kExtSemasSignal"; + case GpuDriver::GraphNodeType::kExtSemasWait: + return "kExtSemasWait"; + case GpuDriver::GraphNodeType::kMemAlloc: + return "kMemAlloc"; + case GpuDriver::GraphNodeType::kMemFree: + return "kMemFree"; + case GpuDriver::GraphNodeType::kBatchMemOp: + return "kBatchMemOp"; + } + return tsl::errors::Internal("Unexpected value for GraphNodeType"); +} + tsl::StatusOr OwnedGpuGraphExec::Update( OwnedGpuGraph graph) { VLOG(3) << "Update gpu graph exec with a new graph after " << num_launches_ @@ -138,21 +170,27 @@ tsl::StatusOr OwnedGpuGraphExec::Update( TF_ASSIGN_OR_RETURN(std::string result_str, GraphExecUpdateResultToString(result.result)); - GpuGraphNodeHandle error_from_node = result.error_from_node; - std::ostringstream error_from_node_ptr; - error_from_node_ptr << reinterpret_cast(error_from_node); - std::string error_from_node_str = error_from_node_ptr.str(); - - GpuGraphNodeHandle error_node = result.error_node; - std::ostringstream error_node_ptr; - error_node_ptr << reinterpret_cast(error_node); - std::string error_node_str = error_node_ptr.str(); - - return tsl::errors::Internal( - absl::StrCat("Failed to update gpu graph: ", "Graph update result=", - result_str, ", Error node handle=", error_node_str, - ", Error from node handle=", error_from_node_str, ": "), - st.message()); + std::string error_message = absl::StrCat( + "Failed to update gpu graph: Graph update result=", result_str); + + if (result.error_node) { + TF_ASSIGN_OR_RETURN(GpuDriver::GraphNodeType node_type, + GpuDriver::GraphNodeGetType(result.error_node)); + TF_ASSIGN_OR_RETURN(std::string node_type_str, + GraphNodeTypeToString(node_type)); + absl::StrAppend(&error_message, ", Error node name=", node_type_str); + } + + if (result.error_from_node) { + TF_ASSIGN_OR_RETURN(GpuDriver::GraphNodeType node_type, + GpuDriver::GraphNodeGetType(result.error_from_node)); + TF_ASSIGN_OR_RETURN(std::string node_type_str, + GraphNodeTypeToString(node_type)); + absl::StrAppend(&error_message, ", Error from node name=", node_type_str); + } + + absl::StrAppend(&error_message, ": ", st.message()); + return tsl::errors::Internal(error_message); } return UpdateResult::kSuccess; From c2519c4aa372eecc6a26dfd4dedee866f3f0f2e3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 16:03:35 -0700 Subject: [PATCH 058/567] Pass along the shape param from tf.compat.v1.get_variable to its underlying variable creator. At this point, we have already checked that either shape is compatible w/ initial_value, or that it is not specified (None). Therefore it should always be safe to pass it. Having it can help subsequent nested variable creators save some cycles tracing the initializer function when all they need to know is the shape. PiperOrigin-RevId: 567107017 --- .../feature_column/feature_column_test.py | 28 +++++++++++++------ .../feature_column/feature_column_v2_test.py | 12 +++++--- tensorflow/python/ops/variable_scope.py | 17 +++++++---- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 3cde6c5657edc1..f8fbd8db7e3b15 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -4924,16 +4924,19 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): + self.assertEqual(dtypes.float32, dtype) if partition_variables: + assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) + return array_ops.slice( + embedding_values, partition_info.var_offset, shape + ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - - self.assertEqual(dtypes.float32, dtype) - return embedding_values + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( @@ -4976,7 +4979,12 @@ def _initializer(shape, dtype, partition_info=None): for v in global_vars: self.assertIsInstance(v, variables_lib.Variable) with _initialized_session(): - self.assertAllEqual(embedding_values, global_vars[0]) + if partition_variables: + self.assertAllEqual( + embedding_values, array_ops.concat(global_vars, axis=0) + ) + else: + self.assertAllEqual(embedding_values, global_vars[0]) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) if use_safe_embedding_lookup: @@ -5783,16 +5791,19 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): + self.assertEqual(dtypes.float32, dtype) if partition_variables: + assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) + return array_ops.slice( + embedding_values, partition_info.var_offset, shape + ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - - self.assertEqual(dtypes.float32, dtype) - return embedding_values + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( @@ -5842,10 +5853,11 @@ def _initializer(shape, dtype, partition_info=None): self.assertCountEqual(('vars/embedding_weights/part_0:0', 'vars/embedding_weights/part_1:0'), tuple([v.name for v in global_vars])) + embedding_var = array_ops.concat(global_vars, axis=0) else: self.assertCountEqual(('vars/embedding_weights:0',), tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 21dcbb4452d6c7..cb98bf60c03184 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -5762,16 +5762,19 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): + self.assertEqual(dtypes.float32, dtype) if partition_variables: + assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) + return array_ops.slice( + embedding_values, partition_info.var_offset, shape + ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - - self.assertEqual(dtypes.float32, dtype) - return embedding_values + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( @@ -5821,10 +5824,11 @@ def _initializer(shape, dtype, partition_info=None): self.assertCountEqual(('vars/aaa_bbb_shared_embedding/part_0:0', 'vars/aaa_bbb_shared_embedding/part_1:0'), tuple([v.name for v in global_vars])) + embedding_var = array_ops.concat(global_vars, axis=0) else: self.assertCountEqual(('vars/aaa_bbb_shared_embedding:0',), tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 33dd0438fa2f2f..622f70732adaba 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -807,7 +807,8 @@ def _get_partitioned_variable(self, use_resource=use_resource, constraint=constraint, synchronization=synchronization, - aggregation=aggregation) + aggregation=aggregation, + ) # pylint: disable=protected-access var._set_save_slice_info( @@ -880,7 +881,8 @@ def _get_single_variable(self, raise ValueError("If initializer is a constant, do not specify shape.") dtype = dtypes.as_dtype(dtype) - shape = tensor_shape.as_shape(shape) + if shape is not None: + shape = tensor_shape.as_shape(shape) if name in self._vars: # Here we handle the case when returning an existing variable. @@ -901,7 +903,9 @@ def _get_single_variable(self, raise ValueError("%s Originally defined at:\n\n%s" % (err_msg, "".join(traceback.format_list(tb)))) found_var = self._vars[name] - if not shape.is_compatible_with(found_var.get_shape()): + if shape is not None and not shape.is_compatible_with( + found_var.get_shape() + ): raise ValueError("Trying to share variable %s, but specified shape %s" " and found shape %s." % (name, shape, found_var.get_shape())) @@ -921,6 +925,7 @@ def _get_single_variable(self, # Create the tensor to initialize the variable with default value. if initializer is None: + assert shape is not None initializer, initializing_from_value = self._get_default_initializer( name=name, shape=shape, dtype=dtype) # Enter an init scope when creating the initializer. @@ -932,7 +937,7 @@ def _get_single_variable(self, # Instantiate initializer if provided initializer is a type object. if tf_inspect.isclass(initializer): initializer = initializer() - if shape.is_fully_defined(): + if shape is not None and shape.is_fully_defined(): if "partition_info" in tf_inspect.getargspec(initializer).args: init_val = functools.partial(initializer, shape.as_list(), @@ -967,7 +972,9 @@ def _get_single_variable(self, constraint=constraint, use_resource=use_resource, synchronization=synchronization, - aggregation=aggregation) + aggregation=aggregation, + shape=shape, + ) if context.executing_eagerly() and self._store_eager_variables: if collections: ops.add_to_collections(collections, v) From af50677da4f39028b85fe42df41cf1ebab68bea5 Mon Sep 17 00:00:00 2001 From: William Muir Date: Wed, 20 Sep 2023 18:09:57 -0500 Subject: [PATCH 059/567] Strip `external/local_tsl` prefix during zip of tsl protos --- tensorflow/tools/lib_package/BUILD | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 578d2e3f2f5bfe..d30b414d5ddbc2 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -2,18 +2,17 @@ # This includes the C API, Java API, and protocol buffer files. load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") -load("@rules_pkg//pkg:tar.bzl", "pkg_tar") +load("@rules_pkg//:pkg.bzl", "pkg_tar", "pkg_zip") load("//tensorflow:tensorflow.bzl", "VERSION", "VERSION_MAJOR", "if_macos") load("//tensorflow/core/platform:build_config_root.bzl", "tf_additional_license_deps") load("//third_party/mkl:build_defs.bzl", "if_enable_mkl", "if_mkl") package(default_visibility = ["//visibility:private"]) -genrule( +pkg_zip( name = "libtensorflow_proto", srcs = ["//tensorflow/core:protos_all_proto_srcs"], - outs = ["libtensorflow_proto.zip"], - cmd = "zip $@ $(SRCS)", + strip_prefix = "/external/local_tsl", ) pkg_tar( From 5e3184466b9f28423f5538eceec6b61274e0ade8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 16:09:01 -0700 Subject: [PATCH 060/567] Exercise new version of TF/XLA bridge in legalize_tf_quant_test PiperOrigin-RevId: 567108435 --- .../mlir/quantization/stablehlo/BUILD | 13 +- .../passes/bridge/legalize_tf_quant_test.cc | 121 ++++++++---------- tensorflow/compiler/mlir/tf2xla/api/v2/BUILD | 5 +- tensorflow/core/tpu/kernels/BUILD | 1 + 4 files changed, 66 insertions(+), 74 deletions(-) diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/BUILD b/tensorflow/compiler/mlir/quantization/stablehlo/BUILD index e3c574600d5c52..a4618d6de03082 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/BUILD +++ b/tensorflow/compiler/mlir/quantization/stablehlo/BUILD @@ -198,15 +198,20 @@ tf_cc_test( tags = ["no_oss"], deps = [ "//tensorflow/compiler/jit", - "//tensorflow/compiler/mlir/tf2xla:compile_mlir_util", + "//tensorflow/compiler/mlir/tf2xla/api/v2:legalize_tf", "//tensorflow/compiler/tf2xla:xla_helpers", "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/protobuf:for_core_protos_cc", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/core/tpu/kernels:tpu_compile_op_support", "@com_google_googletest//:gtest", - "@local_tsl//tsl/platform:statusor", - "@local_xla//xla/hlo/ir:hlo", - "@local_xla//xla/service:hlo_proto_cc", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:Pass", + "@local_xla//xla:shape_util", + "@local_xla//xla/client:client_library", + "@local_xla//xla/stream_executor", ], ) diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/legalize_tf_quant_test.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/legalize_tf_quant_test.cc index 369d0a235041f7..1fd1a0b6bab721 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/legalize_tf_quant_test.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/legalize_tf_quant_test.cc @@ -16,22 +16,53 @@ limitations under the License. #include #include -#include #include -#include "tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util.h" -#include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include "xla/hlo/ir/hlo_opcode.h" -#include "xla/service/hlo.pb.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tf2xla/api/v2/legalize_tf.h" +#include "xla/client/client_library.h" +#include "xla/shape.h" +#include "xla/stream_executor/multi_platform_manager.h" +#include "xla/stream_executor/platform.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" #include "tsl/lib/core/status_test_util.h" -#include "tsl/platform/statusor.h" -namespace tensorflow { -namespace tf2xla { -namespace v2 { +namespace mlir::quant::stablehlo { +namespace { + +class LegalizeTFQuantTest : public ::testing::Test { + protected: + void TestBridgeLowering(llvm::StringRef mlir_module_string, + llvm::ArrayRef arg_shapes) { + tensorflow::tpu::MlirToHloArgs mlir_to_hlo_args; + mlir_to_hlo_args.rollout_state = + tensorflow::ConfigProto::Experimental::MLIR_BRIDGE_ROLLOUT_UNSPECIFIED; + mlir_to_hlo_args.mlir_module = mlir_module_string; + tensorflow::se::Platform* platform = + tensorflow::se::MultiPlatformManager::PlatformWithName("Host").value(); + auto client = + xla::ClientLibrary::GetOrCreateCompileOnlyClient(platform).value(); + tensorflow::tpu::TPUCompileMetadataProto metadata_proto; + bool use_tuple_args = true; + std::vector arg_core_mapping; + std::vector> per_core_arg_shapes; + std::vector> custom_legalization_passes; + + TF_EXPECT_OK(tensorflow::tf2xla::v2::LegalizeMlirToHlo( + mlir_to_hlo_args, metadata_proto, use_tuple_args, + /*device_type=*/"XLA_TPU_JIT", custom_legalization_passes, + /*shape_determination_fns=*/{}, arg_shapes, + &arg_core_mapping, &per_core_arg_shapes, client) + .status()); + } +}; -TEST(LegalizeTFQuantTest, LegalizesModuleWithTFUniformQuantization) { - constexpr char legalization[] = R"mlir( +TEST_F(LegalizeTFQuantTest, LegalizesModuleWithTFUniformQuantization) { + constexpr char mlir_module_string[] = R"mlir( module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 268 : i32}} { func.func @main(%arg0 : tensor<1xf32>) -> tensor<1xf32> { %scales = "tf.Const"() { value = dense<1.0> : tensor } : () -> tensor @@ -48,47 +79,12 @@ TEST(LegalizeTFQuantTest, LegalizesModuleWithTFUniformQuantization) { })mlir"; std::vector arg_shapes = {{1}}; - XlaCompilationResult compilation_result; - - TF_ASSERT_OK(CompileSerializedMlirToXlaHlo( - legalization, arg_shapes, /*device_type=*/"XLA_TPU_JIT", - /*use_tuple_args=*/true, /*enable_op_fallback=*/true, - /*shape_determination_fns=*/{}, &compilation_result) - .status()); - - const xla::HloModuleProto& hlo_module = - compilation_result.computation->proto(); - for (const xla::HloComputationProto computation : hlo_module.computations()) { - for (const xla::HloInstructionProto instruction : - computation.instructions()) { - TF_ASSERT_OK_AND_ASSIGN(xla::HloOpcode opcode, - xla::StringToHloOpcode(instruction.opcode())); - switch (opcode) { - case xla::HloOpcode::kConstant: - case xla::HloOpcode::kDivide: - case xla::HloOpcode::kAdd: - case xla::HloOpcode::kFloor: - case xla::HloOpcode::kConvert: - case xla::HloOpcode::kMaximum: - case xla::HloOpcode::kMinimum: - case xla::HloOpcode::kSubtract: - case xla::HloOpcode::kParameter: - case xla::HloOpcode::kTuple: - case xla::HloOpcode::kGetTupleElement: - case xla::HloOpcode::kBroadcast: - case xla::HloOpcode::kClamp: - case xla::HloOpcode::kRoundNearestEven: - break; - default: - ADD_FAILURE() << "Failed to compile TF uniform quantized ops " - << "(unexpected opcode: " << opcode << ")"; - } - } - } + + TestBridgeLowering(mlir_module_string, arg_shapes); } -TEST(LegalizeTFQuantTest, LegalizesModuleWithDequantize) { - constexpr char legalization[] = R"mlir( +TEST_F(LegalizeTFQuantTest, LegalizesModuleWithDequantize) { + constexpr char mlir_module_string[] = R"mlir( module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 268 : i32}} { func.func @main(%arg0: tensor<1x!tf_type.qint8>) -> tensor<1xf32> { %min_range = "tf.Const"() { value = dense<1.0> : tensor } : () -> tensor @@ -97,19 +93,13 @@ TEST(LegalizeTFQuantTest, LegalizesModuleWithDequantize) { func.return %0 : tensor<1xf32> } })mlir"; - std::vector arg_shapes = {{1}}; - XlaCompilationResult compilation_result; - TF_EXPECT_OK(CompileSerializedMlirToXlaHlo( - legalization, arg_shapes, /*device_type=*/"XLA_CPU_JIT", - /*use_tuple_args=*/true, /*enable_op_fallback=*/true, - /*shape_determination_fns=*/{}, &compilation_result) - .status()); + TestBridgeLowering(mlir_module_string, arg_shapes); } -TEST(LegalizeTFQuantTest, LegalizesModuleWithClipByValue) { - constexpr char legalization[] = R"mlir( +TEST_F(LegalizeTFQuantTest, LegalizesModuleWithClipByValue) { + constexpr char mlir_module_string[] = R"mlir( module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 268 : i32}} { func.func @main(%arg0 : tensor<2x2xf32>) -> tensor<2x2xf32> { %max = "tf.Const"() { value = dense<12.0> : tensor } : () -> tensor @@ -137,17 +127,10 @@ TEST(LegalizeTFQuantTest, LegalizesModuleWithClipByValue) { func.return %2 : tensor<2x2xf32> } })mlir"; - std::vector arg_shapes = {{2, 2}}; - XlaCompilationResult compilation_result; - TF_EXPECT_OK(CompileSerializedMlirToXlaHlo( - legalization, arg_shapes, /*device_type=*/"XLA_TPU_JIT", - /*use_tuple_args=*/true, /*enable_op_fallback=*/true, - /*shape_determination_fns=*/{}, &compilation_result) - .status()); + TestBridgeLowering(mlir_module_string, arg_shapes); } -} // namespace v2 -} // namespace tf2xla -} // namespace tensorflow +} // namespace +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD index acd65e27f78a34..0ff72d68d418c6 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD @@ -11,7 +11,10 @@ package( ) # Please reach out to tf-bridge-team@ before using the TF2XLA bridge. -package_group(name = "tf2xla_users") +package_group( + name = "tf2xla_users", + packages = ["//tensorflow/compiler/mlir/quantization/stablehlo/..."], +) cc_library( name = "legalize_tf", diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index e75f17f57625d7..b7ac39d708bb39 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -32,6 +32,7 @@ load( package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], default_visibility = [ + "//tensorflow/compiler/mlir/quantization:__subpackages__", "//tensorflow/compiler/mlir/tf2xla:__subpackages__", "//tensorflow/compiler/xrt/kernels:__subpackages__", "//tensorflow/core/tpu:__subpackages__", From c4e5c7c6b4227ea5aedc62ae5ef3870f04e6bd74 Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Wed, 20 Sep 2023 16:19:35 -0700 Subject: [PATCH 061/567] Move CreateTFRegionControlFlowToFunctional to graph export as it doesn't really do any Bridge logic. PiperOrigin-RevId: 567111139 --- tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc index b94e65f75e8a41..dfe07231999791 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc @@ -268,7 +268,6 @@ void CreateTPUBridgePipelineImpl( ->tf_mlir_enable_tpu_variable_runtime_reformatting_pass) { pm.addPass(CreateTPUVariableRuntimeReformattingPass()); } - pm.addPass(TF::CreateTFRegionControlFlowToFunctional()); } } // namespace @@ -298,6 +297,7 @@ void CreateTPUBridgePipelineV1(OpPassManager &pm) { pm.addPass(tf_executor::CreateTFExecutorTPUV1IslandOutliningPass()); OpPassManager &nested_module = pm.nest(); CreateTPUBridgePipelineImpl(nested_module); + pm.addPass(tf_executor::CreateTFExecutorTPUV1IslandInliningPass()); // There are cases where we don't consume all compilation and replication // attributes like we do for the V2 pipeline, so we need to convert them from @@ -373,6 +373,7 @@ void AddGraphExportLoweringPasses(OpPassManager &pm) { pm.addPass(CreateBreakUpIslandsPass()); }; + pm.addPass(TF::CreateTFRegionControlFlowToFunctional()); add_pass(CreateFunctionalToExecutorDialectConversionPass()); add_pass(TFDevice::CreateReplicateToIslandPass(/*legacy_graph_export=*/true)); add_pass(TFDevice::CreateReplicaIDToDeviceOrdinalPass()); @@ -391,6 +392,8 @@ void AddGraphExportLoweringPasses(OpPassManager &pm) { } void AddGraphExportLoweringPassesV2(OpPassManager &pm) { + pm.addPass(TF::CreateTFRegionControlFlowToFunctional()); + // First, we need to convert from functional, to executor dialect. pm.addNestedPass( CreateFunctionalToExecutorDialectConversionPass()); @@ -520,7 +523,6 @@ void CreateTFXLABridgePipeline(OpPassManager &pm) { pm.addNestedPass(createCSEPass()); pm.addPass(createSymbolDCEPass()); - pm.addPass(TF::CreateTFRegionControlFlowToFunctional()); } tensorflow::Status RunTFXLABridge(ModuleOp module, From aca5dfa4e11a4d18085b776ecc2bf741ee958569 Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Wed, 20 Sep 2023 16:35:59 -0700 Subject: [PATCH 062/567] Do not create dependencies among instances of an op with TF_RandomGeneratorSideEffect trait This makes the MLIR side effect modelling of such ops consistent with ACD. PiperOrigin-RevId: 567115291 --- .../compiler/mlir/tensorflow/ir/tf_generated_ops.td | 2 +- tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td | 11 ++++++++++- tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 10 ---------- .../tensorflow/tests/side-effect-analysis-test.mlir | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 6ccb126fd9686e..ecd8c234eff40d 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -12306,7 +12306,7 @@ The generated values will have mean 0 and standard deviation 1. TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>; } -def TF_RandomUniformOp : TF_Op<"RandomUniform", [DeclareOpInterfaceMethods, TF_CannotDuplicate, TF_RandomGeneratorSideEffect]> { +def TF_RandomUniformOp : TF_Op<"RandomUniform", [TF_CannotDuplicate, TF_RandomGeneratorSideEffect]> { let summary = "Outputs random values from a uniform distribution."; let description = [{ diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index 55758e0ecec2be..b1805ce7167bb9 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -253,7 +253,16 @@ def TF_RecvSideEffect : MemoryEffects<[MemWrite]>; def TF_XlaHostComputeSideEffect : MemoryEffects<[MemWrite]>; def TF_WriteTrainingPredictions : MemoryEffects<[MemWrite]>; -def TF_RandomGeneratorSideEffect : MemoryEffects<[MemWrite]>; + +// The state among all RNG ops is shared in a single global variable. When an +// RNG op runs, it uses the global variable to produce a random number, then +// updates the global variable. This would be nondeterministic if the RNG ops +// ran in a nondeterministic order, but XLA scheduler is deterministic so the +// order is always determinstic. However, we cannot mark such ops as Pure as +// that may lead to incorrect optimization, e.g. two instances of the same op +// with the same constant input may end up returning +// the same value, even though they should have not. +def TF_RandomGeneratorSideEffect : MemoryEffects<[MemRead]>; // Special effect for keeping `CollectiveReduce` ops in order. def TF_CollectiveReduceOrderingEffect : MemoryEffects<[MemWrite]>; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index d3994a6cf43645..e304bb2a24d9b5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -668,16 +668,6 @@ LogicalResult RandomUniformOp::verify() { return success(); } -std::optional RandomUniformOp::GetResourceInstanceStr() { - // We do not create dependencies among the ops. XLA will run the ops in a - // deterministic order. However, we cannot mark the op as Pure as that may - // lead to incorrect optimization, e.g. two ops with the same constant input - // may end up returning the same value, even though they should have returned - // different values. - static unsigned counter = 0; - return std::to_string(counter++); -} - //===----------------------------------------------------------------------===// // RangeOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir b/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir index c57e07b5e3f74e..00b140dbf2afb9 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir @@ -2909,7 +2909,7 @@ func.func @tpu_execute_effect( // ----- -// Tests that we don't create dependencies between any two `RandomUniform` ops. +// Tests that we don't create dependencies between any two instances of an op with `TF_RandomGeneratorSideEffect` trait. func.func @random_uniform_ordering_effect() -> (tensor<3xf32>) { // expected-remark@above {{ID: 9}} %graph = tf_executor.graph { From 6b14d69788b63fbc9fbc6677098a96c2aca3b594 Mon Sep 17 00:00:00 2001 From: Junwhan Ahn Date: Wed, 20 Sep 2023 16:44:09 -0700 Subject: [PATCH 063/567] Make `PjRtMemorySpace::id()` unique within a client The current API semantics requires memory space ids to be unique only within a memory kind. This is inconsistent with `PjRtDevice::id()` (unique within a client regardless of device types) and cumbersome (since `(memory_kind, id)` is needed to uniquely identify a memory space). This CL changes the contract to require implementations to guarantee ids to be unique within a client. PiperOrigin-RevId: 567117355 --- third_party/xla/xla/pjrt/pjrt_client.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/third_party/xla/xla/pjrt/pjrt_client.h b/third_party/xla/xla/pjrt/pjrt_client.h index 1541851ad0e4ff..1ac2461e8fc136 100644 --- a/third_party/xla/xla/pjrt/pjrt_client.h +++ b/third_party/xla/xla/pjrt/pjrt_client.h @@ -74,8 +74,7 @@ class PjRtMemorySpace { // The devices that this memory space is attached to. virtual absl::Span devices() const = 0; - // The ID of this memory space. IDs are unique among memory spaces of this - // type. + // The ID of this memory space. IDs are globally unique across all hosts. virtual int id() const = 0; // A platform-dependent string that uniquely identifies the kind of the From 86766a45c0d3a06342e2e34b5d5332131a6390c7 Mon Sep 17 00:00:00 2001 From: Subhankar Shah Date: Wed, 20 Sep 2023 16:57:37 -0700 Subject: [PATCH 064/567] [MemorySpaceAssignment] Move code related to MemorySpaceAssignment (memory_space_assignment* files) in a separate folder. PiperOrigin-RevId: 567120948 --- tensorflow/core/BUILD | 2 +- third_party/xla/xla/service/BUILD | 140 +-------------- .../xla/xla/service/buffer_assignment.h | 2 +- third_party/xla/xla/service/heap_simulator.cc | 2 +- third_party/xla/xla/service/heap_simulator.h | 2 +- .../xla/service/memory_space_assignment/BUILD | 160 ++++++++++++++++++ .../memory_space_assignment.cc | 8 +- .../memory_space_assignment.h | 10 +- .../memory_space_assignment.proto | 0 ...mory_space_assignment_best_fit_repacker.cc | 2 +- ...emory_space_assignment_best_fit_repacker.h | 8 +- ...space_assignment_best_fit_repacker_test.cc | 2 +- .../memory_space_assignment_repacking.h | 6 +- .../memory_space_assignment_test.cc | 10 +- .../memory_space_assignment_tuning_utils.cc | 4 +- .../memory_space_assignment_tuning_utils.h | 6 +- .../memory_space_assignment_utils.cc | 2 +- .../memory_space_assignment_utils.h | 6 +- third_party/xla/xla/xla.bzl | 4 +- 19 files changed, 200 insertions(+), 176 deletions(-) create mode 100644 third_party/xla/xla/service/memory_space_assignment/BUILD rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment.cc (99%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment.h (99%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment.proto (100%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_best_fit_repacker.cc (97%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_best_fit_repacker.h (79%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_best_fit_repacker_test.cc (97%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_repacking.h (95%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_test.cc (99%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_tuning_utils.cc (92%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_tuning_utils.h (83%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_utils.cc (97%) rename third_party/xla/xla/service/{ => memory_space_assignment}/memory_space_assignment_utils.h (84%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 757e75829b4503..09bc9919bb9b4e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1470,7 +1470,7 @@ cc_library( "@local_xla//xla:xla_proto_cc_impl", "@local_xla//xla:xla_data_proto_cc_impl", "@local_xla//xla/service:hlo_proto_cc_impl", - "@local_xla//xla/service:memory_space_assignment_proto_cc_impl", + "@local_xla//xla/service/memory_space_assignment:memory_space_assignment_proto_cc_impl", "@local_xla//xla/service/gpu:backend_configs_cc_impl", "@local_xla//xla/service/gpu:hlo_op_profile_proto_cc_impl", ] + tf_protos_grappler_impl() + tf_monitoring_framework_deps(), diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 5c07e6f9a7efec..934cbf95b298be 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -1624,7 +1624,6 @@ cc_library( ":hlo_proto_cc", ":hlo_value", ":logical_buffer", - ":memory_space_assignment", ":tuple_points_to_analysis", "//xla:shape_util", "//xla:status_macros", @@ -1633,6 +1632,7 @@ cc_library( "//xla:util", "//xla/hlo/ir:hlo", "//xla/hlo/utils:hlo_live_range", + "//xla/service/memory_space_assignment", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", @@ -1735,7 +1735,6 @@ cc_library( ":hlo_dataflow_analysis", ":hlo_ordering", ":hlo_proto_cc", - ":memory_space_assignment_repacking", ":tuple_points_to_analysis", "//xla:comparison_util", "//xla:status", @@ -1743,6 +1742,7 @@ cc_library( "//xla:util", "//xla/hlo/ir:hlo", "//xla/hlo/utils:hlo_live_range", + "//xla/service/memory_space_assignment:memory_space_assignment_repacking", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", @@ -4591,134 +4591,6 @@ xla_cc_test( ], ) -cc_library( - name = "memory_space_assignment_utils", - srcs = ["memory_space_assignment_utils.cc"], - hdrs = ["memory_space_assignment_utils.h"], - visibility = ["//visibility:public"], - deps = [ - ":heap_simulator", - "//xla/hlo/ir:hlo", - ], -) - -cc_library( - name = "memory_space_assignment_tuning_utils", - srcs = ["memory_space_assignment_tuning_utils.cc"], - hdrs = ["memory_space_assignment_tuning_utils.h"], - visibility = ["//visibility:public"], - deps = [ - ":heap_simulator", - ":memory_space_assignment_utils", - "//xla/hlo/ir:hlo", - ], -) - -cc_library( - name = "memory_space_assignment_repacking", - hdrs = ["memory_space_assignment_repacking.h"], - visibility = ["//visibility:public"], - deps = [ - "//xla:statusor", - "//xla:types", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - ], -) - -cc_library( - name = "memory_space_assignment_best_fit_repacker", - srcs = ["memory_space_assignment_best_fit_repacker.cc"], - hdrs = ["memory_space_assignment_best_fit_repacker.h"], - visibility = ["//visibility:public"], - deps = [ - ":heap_simulator", - ":memory_space_assignment_repacking", - ], -) - -xla_cc_test( - name = "memory_space_assignment_best_fit_repacker_test", - srcs = ["memory_space_assignment_best_fit_repacker_test.cc"], - deps = [ - ":memory_space_assignment_best_fit_repacker", - "//xla/tests:xla_internal_test_main", - "@local_tsl//tsl/platform:test", - ], -) - -cc_library( - name = "memory_space_assignment", - srcs = ["memory_space_assignment.cc"], - hdrs = ["memory_space_assignment.h"], - visibility = ["//visibility:public"], - deps = [ - ":heap_simulator", - ":hlo_cost_analysis", - ":hlo_proto_cc", - ":hlo_value", - ":memory_space_assignment_proto_cc", - ":memory_space_assignment_repacking", - ":memory_space_assignment_tuning_utils", - ":memory_space_assignment_utils", - ":tuple_util", - "//xla:debug_options_flags", - "//xla:shape_util", - "//xla:status", - "//xla:statusor", - "//xla:util", - "//xla/hlo/ir:hlo", - "//xla/hlo/utils:hlo_live_range", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/container:btree", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/functional:function_ref", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:casts", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:logging", - "@local_tsl//tsl/platform:statusor", - ], -) - -xla_cc_test( - name = "memory_space_assignment_test", - srcs = ["memory_space_assignment_test.cc"], - deps = [ - ":heap_simulator", - ":hlo_cost_analysis", - ":hlo_value", - ":instruction_hoister", - ":memory_space_assignment", - ":memory_space_assignment_proto_cc", - ":memory_space_assignment_repacking", - "//xla:shape_util", - "//xla:status", - "//xla:util", - "//xla:xla_data_proto_cc", - "//xla/hlo/ir:hlo", - "//xla/hlo/utils:hlo_matchers", - "//xla/tests:hlo_test_base", - "//xla/tests:xla_internal_test_main", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/log", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest", - "@local_tsl//tsl/lib/core:status_test_util", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", - "@local_tsl//tsl/platform:test", - ], -) - cc_library( name = "memory_space_propagation", srcs = ["memory_space_propagation.cc"], @@ -7251,11 +7123,3 @@ xla_cc_test( "@local_tsl//tsl/platform:test", ]), ) - -tf_proto_library( - name = "memory_space_assignment_proto", - srcs = ["memory_space_assignment.proto"], - cc_api_version = 2, - make_default_target_header_only = True, - visibility = ["//visibility:public"], -) diff --git a/third_party/xla/xla/service/buffer_assignment.h b/third_party/xla/xla/service/buffer_assignment.h index c77ffc4d895213..8c43b47fe9ded5 100644 --- a/third_party/xla/xla/service/buffer_assignment.h +++ b/third_party/xla/xla/service/buffer_assignment.h @@ -34,7 +34,7 @@ limitations under the License. #include "xla/service/hlo_alias_analysis.h" #include "xla/service/hlo_dataflow_analysis.h" #include "xla/service/logical_buffer.h" -#include "xla/service/memory_space_assignment.h" +#include "xla/service/memory_space_assignment/memory_space_assignment.h" #include "xla/service/tuple_points_to_analysis.h" #include "xla/statusor.h" #include "xla/types.h" diff --git a/third_party/xla/xla/service/heap_simulator.cc b/third_party/xla/xla/service/heap_simulator.cc index 44d7b87a804245..d5e099d587397d 100644 --- a/third_party/xla/xla/service/heap_simulator.cc +++ b/third_party/xla/xla/service/heap_simulator.cc @@ -43,7 +43,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_schedule.h" #include "xla/hlo/utils/hlo_live_range.h" #include "xla/map_util.h" -#include "xla/service/memory_space_assignment_repacking.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_repacking.h" #include "xla/status.h" #include "xla/util.h" diff --git a/third_party/xla/xla/service/heap_simulator.h b/third_party/xla/xla/service/heap_simulator.h index 755c735608ebf8..94cb22fd5e272b 100644 --- a/third_party/xla/xla/service/heap_simulator.h +++ b/third_party/xla/xla/service/heap_simulator.h @@ -46,7 +46,7 @@ limitations under the License. #include "xla/service/hlo_buffer.h" #include "xla/service/hlo_dataflow_analysis.h" #include "xla/service/hlo_ordering.h" -#include "xla/service/memory_space_assignment_repacking.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_repacking.h" #include "xla/service/tuple_points_to_analysis.h" #include "xla/statusor.h" diff --git a/third_party/xla/xla/service/memory_space_assignment/BUILD b/third_party/xla/xla/service/memory_space_assignment/BUILD new file mode 100644 index 00000000000000..48ff95b0a13f4e --- /dev/null +++ b/third_party/xla/xla/service/memory_space_assignment/BUILD @@ -0,0 +1,160 @@ +# Description: +# Memory Space Assignment service implementation. + +load( + "//xla:xla.bzl", + "xla_cc_test", +) +load( + "@local_tsl//tsl/platform:build_config.bzl", + "tf_proto_library", +) +load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], +) + +package_group( + name = "friends", + includes = [ + "//xla:friends", + ], +) + +tf_proto_library( + name = "memory_space_assignment_proto", + srcs = ["memory_space_assignment.proto"], + cc_api_version = 2, + make_default_target_header_only = True, + visibility = ["//visibility:public"], +) + +cc_library( + name = "memory_space_assignment", + srcs = ["memory_space_assignment.cc"], + hdrs = ["memory_space_assignment.h"], + visibility = ["//visibility:public"], + deps = [ + ":memory_space_assignment_proto_cc", + ":memory_space_assignment_repacking", + ":memory_space_assignment_tuning_utils", + ":memory_space_assignment_utils", + "//xla:debug_options_flags", + "//xla:shape_util", + "//xla:status", + "//xla:statusor", + "//xla:util", + "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_live_range", + "//xla/service:heap_simulator", + "//xla/service:hlo_cost_analysis", + "//xla/service:hlo_proto_cc", + "//xla/service:hlo_value", + "//xla/service:tuple_util", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@local_tsl//tsl/platform:casts", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:logging", + "@local_tsl//tsl/platform:statusor", + ], +) + +xla_cc_test( + name = "memory_space_assignment_test", + srcs = ["memory_space_assignment_test.cc"], + deps = [ + ":memory_space_assignment", + ":memory_space_assignment_proto_cc", + ":memory_space_assignment_repacking", + "//xla:shape_util", + "//xla:status", + "//xla:util", + "//xla:xla_data_proto_cc", + "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_matchers", + "//xla/service:heap_simulator", + "//xla/service:hlo_cost_analysis", + "//xla/service:hlo_value", + "//xla/service:instruction_hoister", + "//xla/tests:hlo_test_base", + "//xla/tests:xla_internal_test_main", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest", + "@local_tsl//tsl/lib/core:status_test_util", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/platform:statusor", + "@local_tsl//tsl/platform:test", + ], +) + +cc_library( + name = "memory_space_assignment_repacking", + hdrs = ["memory_space_assignment_repacking.h"], + visibility = ["//visibility:public"], + deps = [ + "//xla:statusor", + "//xla:types", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "memory_space_assignment_best_fit_repacker", + srcs = ["memory_space_assignment_best_fit_repacker.cc"], + hdrs = ["memory_space_assignment_best_fit_repacker.h"], + visibility = ["//visibility:public"], + deps = [ + ":memory_space_assignment_repacking", + "//xla/service:heap_simulator", + ], +) + +cc_library( + name = "memory_space_assignment_utils", + srcs = ["memory_space_assignment_utils.cc"], + hdrs = ["memory_space_assignment_utils.h"], + visibility = ["//visibility:public"], + deps = [ + "//xla/hlo/ir:hlo", + "//xla/service:heap_simulator", + ], +) + +cc_library( + name = "memory_space_assignment_tuning_utils", + srcs = ["memory_space_assignment_tuning_utils.cc"], + hdrs = ["memory_space_assignment_tuning_utils.h"], + visibility = ["//visibility:public"], + deps = [ + ":memory_space_assignment_utils", + "//xla/hlo/ir:hlo", + "//xla/service:heap_simulator", + ], +) + +xla_cc_test( + name = "memory_space_assignment_best_fit_repacker_test", + srcs = ["memory_space_assignment_best_fit_repacker_test.cc"], + deps = [ + ":memory_space_assignment_best_fit_repacker", + "//xla/tests:xla_internal_test_main", + "@local_tsl//tsl/platform:test", + ], +) diff --git a/third_party/xla/xla/service/memory_space_assignment.cc b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.cc similarity index 99% rename from third_party/xla/xla/service/memory_space_assignment.cc rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.cc index 2a28441e9961d8..317162b005a125 100644 --- a/third_party/xla/xla/service/memory_space_assignment.cc +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/memory_space_assignment.h" +#include "xla/service/memory_space_assignment/memory_space_assignment.h" #include #include @@ -46,9 +46,9 @@ limitations under the License. #include "xla/hlo/utils/hlo_live_range.h" #include "xla/service/heap_simulator.h" #include "xla/service/hlo_value.h" -#include "xla/service/memory_space_assignment_repacking.h" -#include "xla/service/memory_space_assignment_tuning_utils.h" -#include "xla/service/memory_space_assignment_utils.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_repacking.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_utils.h" #include "xla/service/tuple_util.h" #include "xla/shape.h" #include "xla/shape_util.h" diff --git a/third_party/xla/xla/service/memory_space_assignment.h b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.h similarity index 99% rename from third_party/xla/xla/service/memory_space_assignment.h rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.h index b39e14170e9ca8..e988d598332b6c 100644 --- a/third_party/xla/xla/service/memory_space_assignment.h +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_H_ -#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_H_ +#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_H_ +#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_H_ #include #include @@ -42,8 +42,8 @@ limitations under the License. #include "xla/service/hlo.pb.h" #include "xla/service/hlo_cost_analysis.h" #include "xla/service/hlo_value.h" -#include "xla/service/memory_space_assignment.pb.h" -#include "xla/service/memory_space_assignment_repacking.h" +#include "xla/service/memory_space_assignment/memory_space_assignment.pb.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_repacking.h" #include "xla/shape.h" #include "xla/statusor.h" @@ -2667,4 +2667,4 @@ class AlternateMemoryBestFitHeap } // namespace memory_space_assignment } // namespace xla -#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_H_ +#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_H_ diff --git a/third_party/xla/xla/service/memory_space_assignment.proto b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.proto similarity index 100% rename from third_party/xla/xla/service/memory_space_assignment.proto rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment.proto diff --git a/third_party/xla/xla/service/memory_space_assignment_best_fit_repacker.cc b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.cc similarity index 97% rename from third_party/xla/xla/service/memory_space_assignment_best_fit_repacker.cc rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.cc index c5354e2d6bbc8b..6eaf744f5a9e84 100644 --- a/third_party/xla/xla/service/memory_space_assignment_best_fit_repacker.cc +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/memory_space_assignment_best_fit_repacker.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.h" #include #include diff --git a/third_party/xla/xla/service/memory_space_assignment_best_fit_repacker.h b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.h similarity index 79% rename from third_party/xla/xla/service/memory_space_assignment_best_fit_repacker.h rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.h index 71742efd428701..220c19350b5925 100644 --- a/third_party/xla/xla/service/memory_space_assignment_best_fit_repacker.h +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.h @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_BEST_FIT_REPACKER_H_ -#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_BEST_FIT_REPACKER_H_ +#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_BEST_FIT_REPACKER_H_ +#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_BEST_FIT_REPACKER_H_ #include "xla/service/heap_simulator.h" -#include "xla/service/memory_space_assignment_repacking.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_repacking.h" namespace xla { @@ -41,4 +41,4 @@ class MemorySpaceAssignmentBestFitRepacker } // namespace xla -#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_BEST_FIT_REPACKER_H_ +#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_BEST_FIT_REPACKER_H_ diff --git a/third_party/xla/xla/service/memory_space_assignment_best_fit_repacker_test.cc b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker_test.cc similarity index 97% rename from third_party/xla/xla/service/memory_space_assignment_best_fit_repacker_test.cc rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker_test.cc index 60e79cdfeba059..52b87ffebb3c28 100644 --- a/third_party/xla/xla/service/memory_space_assignment_best_fit_repacker_test.cc +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/memory_space_assignment_best_fit_repacker.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_best_fit_repacker.h" #include "tsl/platform/test.h" diff --git a/third_party/xla/xla/service/memory_space_assignment_repacking.h b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_repacking.h similarity index 95% rename from third_party/xla/xla/service/memory_space_assignment_repacking.h rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_repacking.h index a5d34e844bf35b..9d0be634ddda01 100644 --- a/third_party/xla/xla/service/memory_space_assignment_repacking.h +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_repacking.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ -#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ +#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ +#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ #include #include @@ -138,4 +138,4 @@ class MemorySpaceAssignmentRepacker { } // namespace xla -#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ +#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ diff --git a/third_party/xla/xla/service/memory_space_assignment_test.cc b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_test.cc similarity index 99% rename from third_party/xla/xla/service/memory_space_assignment_test.cc rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_test.cc index ecb93020c529f5..4032ec52bc7f36 100644 --- a/third_party/xla/xla/service/memory_space_assignment_test.cc +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/memory_space_assignment.h" +#include "xla/service/memory_space_assignment/memory_space_assignment.h" #include #include @@ -53,8 +53,8 @@ limitations under the License. #include "xla/service/hlo_cost_analysis.h" #include "xla/service/hlo_value.h" #include "xla/service/instruction_hoister.h" -#include "xla/service/memory_space_assignment.pb.h" -#include "xla/service/memory_space_assignment_repacking.h" +#include "xla/service/memory_space_assignment/memory_space_assignment.pb.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_repacking.h" #include "xla/shape.h" #include "xla/shape_util.h" #include "xla/status.h" @@ -11678,8 +11678,8 @@ class MockRepacker : public MemorySpaceAssignmentRepacker { // - With repacking, we are able to prefetch p4. // - When repacking occurs, we expect p2 and p3 to have been allocated chunks. // We are only proposing slices for f32[32, 16] and not f32[16,16]; thus, we -// expect slicing metdata to be attached to the repacking block for p2 but not -// p3. +// expect slicing metadata to be attached to the repacking block for p2 but +// not p3. // - We make the repacker assign the first slice (in time) of p2 the larger // offset. After MSA, we check to make sure the fist slice is using the // larger slicing parameters diff --git a/third_party/xla/xla/service/memory_space_assignment_tuning_utils.cc b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.cc similarity index 92% rename from third_party/xla/xla/service/memory_space_assignment_tuning_utils.cc rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.cc index 32db6becff9dba..2e48eaeb0f670f 100644 --- a/third_party/xla/xla/service/memory_space_assignment_tuning_utils.cc +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/memory_space_assignment_tuning_utils.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.h" -#include "xla/service/memory_space_assignment_utils.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_utils.h" namespace xla { namespace memory_space_assignment { diff --git a/third_party/xla/xla/service/memory_space_assignment_tuning_utils.h b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.h similarity index 83% rename from third_party/xla/xla/service/memory_space_assignment_tuning_utils.h rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.h index 749b4445e4be9a..ce6230982bd5b0 100644 --- a/third_party/xla/xla/service/memory_space_assignment_tuning_utils.h +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_tuning_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_TUNING_UTILS_H_ -#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_TUNING_UTILS_H_ +#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_TUNING_UTILS_H_ +#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_TUNING_UTILS_H_ #include "xla/hlo/ir/hlo_module.h" #include "xla/service/heap_simulator.h" @@ -35,4 +35,4 @@ void CustomizeSortedBufferInterval( } // namespace memory_space_assignment } // namespace xla -#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_TUNING_UTILS_H_ +#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_TUNING_UTILS_H_ diff --git a/third_party/xla/xla/service/memory_space_assignment_utils.cc b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_utils.cc similarity index 97% rename from third_party/xla/xla/service/memory_space_assignment_utils.cc rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_utils.cc index c9e84955c19bc9..03f1621dfbaea9 100644 --- a/third_party/xla/xla/service/memory_space_assignment_utils.cc +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_utils.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/memory_space_assignment_utils.h" +#include "xla/service/memory_space_assignment/memory_space_assignment_utils.h" #include "xla/hlo/ir/hlo_casting_utils.h" #include "xla/hlo/ir/hlo_instructions.h" diff --git a/third_party/xla/xla/service/memory_space_assignment_utils.h b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_utils.h similarity index 84% rename from third_party/xla/xla/service/memory_space_assignment_utils.h rename to third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_utils.h index 10b486983ee253..e4cf6f45b51517 100644 --- a/third_party/xla/xla/service/memory_space_assignment_utils.h +++ b/third_party/xla/xla/service/memory_space_assignment/memory_space_assignment_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_UTILS_H_ -#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_UTILS_H_ +#ifndef XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_UTILS_H_ +#define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_UTILS_H_ #include "xla/service/heap_simulator.h" @@ -35,4 +35,4 @@ class MemorySpaceAssignmentUtils { } // namespace xla -#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_UTILS_H_ +#endif // XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_SPACE_ASSIGNMENT_UTILS_H_ diff --git a/third_party/xla/xla/xla.bzl b/third_party/xla/xla/xla.bzl index 79a9ea075a7080..0cfaa67192e898 100644 --- a/third_party/xla/xla/xla.bzl +++ b/third_party/xla/xla/xla.bzl @@ -59,7 +59,7 @@ def xla_cc_binary(deps = None, copts = tsl_copts(), **kwargs): "//xla:xla_proto_cc_impl", "//xla:xla_data_proto_cc_impl", "//xla/service:hlo_proto_cc_impl", - "//xla/service:memory_space_assignment_proto_cc_impl", + "//xla/service/memory_space_assignment:memory_space_assignment_proto_cc_impl", "//xla/service/gpu:backend_configs_cc_impl", "//xla/service/gpu:hlo_op_profile_proto_cc_impl", "//xla/stream_executor:dnn_proto_cc_impl", @@ -91,7 +91,7 @@ def xla_cc_test( clean_dep("//xla:xla_proto_cc_impl"), clean_dep("//xla:xla_data_proto_cc_impl"), clean_dep("//xla/service:hlo_proto_cc_impl"), - clean_dep("//xla/service:memory_space_assignment_proto_cc_impl"), + clean_dep("//xla/service/memory_space_assignment:memory_space_assignment_proto_cc_impl"), clean_dep("//xla/service/gpu:backend_configs_cc_impl"), clean_dep("//xla/service/gpu:hlo_op_profile_proto_cc_impl"), clean_dep("//xla/stream_executor:dnn_proto_cc_impl"), From 44378dfdc0b10dea9a44c86b3d7210a83c304419 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 17:07:13 -0700 Subject: [PATCH 065/567] Allow `std::nullopt` in `xla::ifrt::MemoryKind` constructor. PiperOrigin-RevId: 567123142 --- third_party/xla/xla/python/ifrt/memory.cc | 3 +++ third_party/xla/xla/python/ifrt/memory_test.cc | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/third_party/xla/xla/python/ifrt/memory.cc b/third_party/xla/xla/python/ifrt/memory.cc index 05821e488f61f6..27a7ad8d6d551e 100644 --- a/third_party/xla/xla/python/ifrt/memory.cc +++ b/third_party/xla/xla/python/ifrt/memory.cc @@ -39,6 +39,9 @@ struct MemoryKindsSet { MemoryKind::MemoryKind(std::optional memory_kind) { static auto* const global_set = new MemoryKindsSet(); + if (!memory_kind.has_value()) { + return; + } absl::MutexLock lock(&global_set->mu); auto it = global_set->memory_kinds_set.find(*memory_kind); if (it == global_set->memory_kinds_set.end()) { diff --git a/third_party/xla/xla/python/ifrt/memory_test.cc b/third_party/xla/xla/python/ifrt/memory_test.cc index 0e7e291bd9f945..bb223471c44776 100644 --- a/third_party/xla/xla/python/ifrt/memory_test.cc +++ b/third_party/xla/xla/python/ifrt/memory_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "xla/python/ifrt/memory.h" #include +#include #include #include @@ -72,6 +73,12 @@ TEST(MemoryKindTest, MemorySafety) { EXPECT_THAT(memory_kind.memory_kind(), Optional(absl::string_view("abc"))); } +TEST(MemoryKindTest, EqualityForUnspecifiedAndNullopt) { + MemoryKind memory_kind1; + MemoryKind memory_kind2(std::nullopt); + EXPECT_EQ(memory_kind1, memory_kind2); +} + } // namespace } // namespace ifrt } // namespace xla From e6da72d8c1c7969e409488b843fe66cdf85e341f Mon Sep 17 00:00:00 2001 From: Edward Schwartz Date: Wed, 20 Sep 2023 17:54:35 -0700 Subject: [PATCH 066/567] 1) Fix: * test_dense_input_ragged_weights_fails, * test_sparse_input_ragged_weights_fails. sparse_ops.sparse_bincount is intended to handle inputs of dense tensors (including non-TensorFlow types that can be converted to dense tensors) and sparse tensors only. Ragged tensor inputs are expected to use other software via dispatch. `weights` are not considered by dispatch. Previously, if `weights` is not a sparse tensor, it is assumed to be something that can be converted to a dense tensor (e.g. a numpy array or Python list) but this conversion fails for ragged tensors. This CL only converts `weights` if it is not any type of composite tensor, allowing error checking to detect any mismatch in the type of tensor between the input and `weights`. 2) Enable running ragged bincount tests. PiperOrigin-RevId: 567132514 --- tensorflow/python/ops/BUILD | 1 + tensorflow/python/ops/ragged/BUILD | 1 + tensorflow/python/ops/ragged/ragged_bincount_ops_test.py | 4 ++++ tensorflow/python/ops/sparse_ops.py | 7 ++++++- 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/BUILD b/tensorflow/python/ops/BUILD index 36a41014cba5ca..00679cbf1d36ec 100644 --- a/tensorflow/python/ops/BUILD +++ b/tensorflow/python/ops/BUILD @@ -2601,6 +2601,7 @@ py_strict_library( ":math_ops_gen", ":sparse_ops_gen", ":special_math_ops", + "//tensorflow/python/framework:composite_tensor", "//tensorflow/python/framework:constant_op", "//tensorflow/python/framework:dtypes", "//tensorflow/python/framework:ops", diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD index 27b6f311519c82..ccb6c9dcd63826 100644 --- a/tensorflow/python/ops/ragged/BUILD +++ b/tensorflow/python/ops/ragged/BUILD @@ -165,6 +165,7 @@ cuda_py_strict_test( "//tensorflow/python/framework:test_lib", "//tensorflow/python/ops:bincount_ops", "//tensorflow/python/ops:sparse_ops", + "//tensorflow/python/platform:client_testlib", "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py b/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py index ec7fa539c83a75..bda3dc32c6eedd 100644 --- a/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py +++ b/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py @@ -24,6 +24,7 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.ops.ragged import ragged_tensor +from tensorflow.python.platform import test def _ragged_factory(x): @@ -551,3 +552,6 @@ def test_ragged_input_different_shape_fails(self): with self.assertRaisesRegex(errors.InvalidArgumentError, "must have the same row splits"): self.evaluate(sparse_ops.sparse_bincount(x, weights=weights, axis=-1)) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index db11f005f94d39..82cee2d96e0866 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -25,6 +25,7 @@ import numpy as np +from tensorflow.python.framework import composite_tensor from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -3351,7 +3352,11 @@ def sparse_bincount(values, values = tensor_conversion.convert_to_tensor_v2_with_dispatch( values, name="values") if weights is not None: - if not isinstance(weights, sparse_tensor.SparseTensor): + # Note that `weights` is not used for dispatch and if there is a type + # mismatch between `values` and `weights`, `weights` can be a RaggedTensor + # (or potentially some other kind of CompositeTensor) where conversion + # to a dense tensor fails. + if not isinstance(weights, composite_tensor.CompositeTensor): weights = tensor_conversion.convert_to_tensor_v2_with_dispatch( weights, name="weights") From 21249b3f3a157d38b25ae954a13b0ef3057e06e6 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 20 Sep 2023 18:04:37 -0700 Subject: [PATCH 067/567] [stream_executor] NFC: Restrict visibility of stream_executor_internal target https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 567134621 --- third_party/xla/xla/stream_executor/BUILD | 65 +++++++++++-------- .../xla/xla/stream_executor/build_defs.bzl | 3 + .../stream_executor_internal.h | 17 ++--- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 14348bd2a6e9e5..f4b13f734c8301 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -5,7 +5,7 @@ # do not link against restricted binary blobs. load("//xla:xla.bzl", "xla_cc_test") -load("//xla/stream_executor:build_defs.bzl", "stream_executor_friends") +load("//xla/stream_executor:build_defs.bzl", "stream_executor_friends", "stream_executor_internal") load("@local_tsl//tsl:tsl.bzl", "set_external_visibility", "transitive_hdrs") load("@local_tsl//tsl:tsl.default.bzl", "filegroup") load("@local_tsl//tsl/platform:build_config.bzl", "tf_proto_library") @@ -17,15 +17,51 @@ package( licenses = ["notice"], ) +# StreamExecutor clients that can depend on targets defined in stream_executor package. package_group( name = "friends", packages = stream_executor_friends(), ) +# StreamExecutor platform-dependent implementations. We restrict visibility of all internal +# implementation interfaces to internal users (everything in `stream_executor::internal` namespace). +package_group( + name = "internal", + packages = stream_executor_internal(), +) + +#===--------------------------------------------------------------------------------------------===# +# StreamExecutor platform-dependent implementation details +#===--------------------------------------------------------------------------------------------===# + +# Only platform-dependent StreamExecutor implementations (e.g. StreamExecutor for GPUs) and targets +# defined by StreamExecutor itself (e.g. `event`, `kernel`, etc.) can depend on internal +# implementation details (interfaces that define platform-specific API). +# +# External clients of StreamExecutor should depend on `stream_executor` target (links StreamExecutor +# implementation in static build configuration), or a header only `stream_executor_headers`. + +cc_library( + name = "stream_executor_internal", + hdrs = ["stream_executor_internal.h"], + visibility = ["//visibility:public"], + deps = [ + ":stream_executor_headers", + "//xla/stream_executor/platform", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/status", + "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/platform:statusor", + ], +) + +#===--------------------------------------------------------------------------------------------===# + # The stream_executor_headers target does not prescribe an implementation. cc_library( name = "stream_executor_headers", textual_hdrs = [ + "allocator_stats.h", "blas.h", "command_buffer.h", "device_description.h", @@ -368,33 +404,6 @@ cc_library( ], ) -cc_library( - name = "stream_executor_internal", - hdrs = [ - "stream_executor_internal.h", - ], - visibility = ["//visibility:public"], - deps = [ - ":allocator_stats", - ":device_description", - ":device_memory", - ":device_options", - ":kernel", - ":kernel_cache_config", - ":kernel_spec", - ":launch_dim", - ":module_spec", - ":plugin_registry", - "//xla/stream_executor/platform", - "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/status", - "@com_google_absl//absl/types:optional", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", - ], -) - cc_library( name = "stream_executor_pimpl_header", hdrs = [ diff --git a/third_party/xla/xla/stream_executor/build_defs.bzl b/third_party/xla/xla/stream_executor/build_defs.bzl index c76ff0bdea1074..8e75a55324a063 100644 --- a/third_party/xla/xla/stream_executor/build_defs.bzl +++ b/third_party/xla/xla/stream_executor/build_defs.bzl @@ -4,6 +4,9 @@ load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured") def stream_executor_friends(): return ["//..."] +def stream_executor_internal(): + return ["//..."] + def tf_additional_cuda_platform_deps(): return [] diff --git a/third_party/xla/xla/stream_executor/stream_executor_internal.h b/third_party/xla/xla/stream_executor/stream_executor_internal.h index afb32653eaa222..9acd28dc84deb5 100644 --- a/third_party/xla/xla/stream_executor/stream_executor_internal.h +++ b/third_party/xla/xla/stream_executor/stream_executor_internal.h @@ -31,7 +31,6 @@ limitations under the License. #include "absl/functional/any_invocable.h" #include "absl/status/status.h" -#include "absl/types/optional.h" #include "xla/stream_executor/allocator_stats.h" #include "xla/stream_executor/device_description.h" #include "xla/stream_executor/device_memory.h" @@ -46,7 +45,6 @@ limitations under the License. #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/trace_listener.h" -#include "tsl/platform/errors.h" #include "tsl/platform/status.h" #include "tsl/platform/statusor.h" @@ -186,21 +184,21 @@ class StreamExecutorInterface { virtual tsl::Status GetKernel(const MultiKernelLoaderSpec& spec, KernelBase* kernel) { - return tsl::errors::Unimplemented("Not Implemented"); + return absl::UnimplementedError("Not Implemented"); } virtual bool UnloadModule(ModuleHandle module_handle) { return false; } virtual tsl::Status LoadModule(const MultiModuleLoaderSpec& spec, ModuleHandle* module_handle) { - return tsl::errors::Unimplemented("Not Implemented"); + return absl::UnimplementedError("Not Implemented"); } virtual tsl::StatusOr> CreateOrShareConstant(Stream* stream, const std::vector& content) { - return tsl::errors::Unimplemented("Not Implemented"); + return absl::UnimplementedError("Not Implemented"); } virtual tsl::Status Launch(Stream* stream, const ThreadDim& thread_dims, const BlockDim& block_dims, const KernelBase& k, const KernelArgsArrayBase& args) { - return tsl::errors::Unimplemented("Not Implemented"); + return absl::UnimplementedError("Not Implemented"); } // Releases any state associated with the kernel. @@ -262,8 +260,7 @@ class StreamExecutorInterface { virtual tsl::Status WaitForEvent(Stream* stream, Event* event) = 0; virtual tsl::Status WaitForEventOnExternalStream(std::intptr_t stream, Event* event) { - return tsl::Status( - absl::StatusCode::kUnimplemented, + return absl::UnimplementedError( "WaitForEventOnExternalStream not supported on this executor."); } virtual Event::Status PollForEventStatus(Event* event) = 0; @@ -272,8 +269,8 @@ class StreamExecutorInterface { virtual bool CreateStreamDependency(Stream* dependent, Stream* other) = 0; virtual tsl::Status BlockHostUntilDone(Stream* stream) = 0; virtual tsl::Status GetStatus(Stream* stream) { - return tsl::Status(absl::StatusCode::kUnimplemented, - "GetStatus is not supported on this executor."); + return absl::UnimplementedError( + "GetStatus is not supported on this executor."); } virtual tsl::Status EnablePeerAccessTo(StreamExecutorInterface* other) = 0; virtual bool CanEnablePeerAccessTo(StreamExecutorInterface* other) = 0; From cb83963df2ed9192a596c234eb4ac7f18879477b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 20 Sep 2023 18:36:01 -0700 Subject: [PATCH 068/567] Integrate LLVM at llvm/llvm-project@afd7db48c55c Updates LLVM usage to match [afd7db48c55c](https://github.com/llvm/llvm-project/commit/afd7db48c55c) PiperOrigin-RevId: 567141554 --- third_party/llvm/generated.patch | 1668 ++++++++++++++++++++++++++++++ third_party/llvm/workspace.bzl | 4 +- 2 files changed, 1670 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch index 509398da979e83..450540fcebd5c7 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch @@ -1 +1,1669 @@ Auto generated patch. Do not edit or delete it, even if empty. +diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -271,10 +271,7 @@ + bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, + IRBuilder<> &Builder); + +- bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly); +- bool hoistSuccIdenticalTerminatorToSwitchOrIf( +- Instruction *TI, Instruction *I1, +- SmallVectorImpl &OtherSuccTIs); ++ bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly); + bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB); + bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB, +@@ -1411,9 +1408,8 @@ + } + + // If we would need to insert a select that uses the value of this invoke +-// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would +-// need to do this), we can't hoist the invoke, as there is nowhere to put the +-// select in this case. ++// (comments in HoistThenElseCodeToIf explain why we would need to do this), we ++// can't hoist the invoke, as there is nowhere to put the select in this case. + static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, + Instruction *I1, Instruction *I2) { + for (BasicBlock *Succ : successors(BB1)) { +@@ -1428,9 +1424,9 @@ + return true; + } + +-// Get interesting characteristics of instructions that +-// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of +-// instructions can be reordered across. ++// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` ++// didn't hoist. They restrict what kind of instructions can be reordered ++// across. + enum SkipFlags { + SkipReadMem = 1, + SkipSideEffect = 2, +@@ -1488,7 +1484,7 @@ + + static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); + +-/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical ++/// Helper function for HoistThenElseCodeToIf. Return true if identical + /// instructions \p I1 and \p I2 can and should be hoisted. + static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, + const TargetTransformInfo &TTI) { +@@ -1519,51 +1515,62 @@ + return true; + } + +-/// Hoist any common code in the successor blocks up into the block. This +-/// function guarantees that BB dominates all successors. If EqTermsOnly is +-/// given, only perform hoisting in case both blocks only contain a terminator. +-/// In that case, only the original BI will be replaced and selects for PHIs are +-/// added. +-bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, +- bool EqTermsOnly) { ++/// Given a conditional branch that goes to BB1 and BB2, hoist any common code ++/// in the two blocks up into the branch block. The caller of this function ++/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, ++/// only perform hoisting in case both blocks only contain a terminator. In that ++/// case, only the original BI will be replaced and selects for PHIs are added. ++bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { + // This does very trivial matching, with limited scanning, to find identical +- // instructions in the two blocks. In particular, we don't want to get into +- // O(N1*N2*...) situations here where Ni are the sizes of these successors. As ++ // instructions in the two blocks. In particular, we don't want to get into ++ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // such, we currently just scan for obviously identical instructions in an + // identical order, possibly separated by the same number of non-identical + // instructions. +- unsigned int SuccSize = succ_size(BB); +- if (SuccSize < 2) +- return false; ++ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. ++ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + + // If either of the blocks has it's address taken, then we can't do this fold, + // because the code we'd hoist would no longer run when we jump into the block + // by it's address. +- for (auto *Succ : successors(BB)) +- if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor()) +- return false; ++ if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) ++ return false; + +- auto *TI = BB->getTerminator(); ++ BasicBlock::iterator BB1_Itr = BB1->begin(); ++ BasicBlock::iterator BB2_Itr = BB2->begin(); + +- // The second of pair is a SkipFlags bitmask. +- using SuccIterPair = std::pair; +- SmallVector SuccIterPairs; +- for (auto *Succ : successors(BB)) { +- BasicBlock::iterator SuccItr = Succ->begin(); +- if (isa(*SuccItr)) +- return false; +- SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); ++ Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; ++ // Skip debug info if it is not identical. ++ DbgInfoIntrinsic *DBI1 = dyn_cast(I1); ++ DbgInfoIntrinsic *DBI2 = dyn_cast(I2); ++ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { ++ while (isa(I1)) ++ I1 = &*BB1_Itr++; ++ while (isa(I2)) ++ I2 = &*BB2_Itr++; + } ++ if (isa(I1)) ++ return false; ++ ++ BasicBlock *BIParent = BI->getParent(); ++ ++ bool Changed = false; ++ ++ auto _ = make_scope_exit([&]() { ++ if (Changed) ++ ++NumHoistCommonCode; ++ }); + + // Check if only hoisting terminators is allowed. This does not add new + // instructions to the hoist location. + if (EqTermsOnly) { + // Skip any debug intrinsics, as they are free to hoist. +- for (auto &SuccIter : make_first_range(SuccIterPairs)) { +- auto *INonDbg = &*skipDebugIntrinsics(SuccIter); +- if (!INonDbg->isTerminator()) +- return false; +- } ++ auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); ++ auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); ++ if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) ++ return false; ++ if (!I1NonDbg->isTerminator()) ++ return false; + // Now we know that we only need to hoist debug intrinsics and the + // terminator. Let the loop below handle those 2 cases. + } +@@ -1572,234 +1579,154 @@ + // many instructions we skip, serving as a compilation time control as well as + // preventing excessive increase of life ranges. + unsigned NumSkipped = 0; +- // If we find an unreachable instruction at the beginning of a basic block, we +- // can still hoist instructions from the rest of the basic blocks. +- if (SuccIterPairs.size() > 2) { +- erase_if(SuccIterPairs, +- [](const auto &Pair) { return isa(Pair.first); }); +- if (SuccIterPairs.size() < 2) +- return false; +- } + +- bool Changed = false; ++ // Record any skipped instuctions that may read memory, write memory or have ++ // side effects, or have implicit control flow. ++ unsigned SkipFlagsBB1 = 0; ++ unsigned SkipFlagsBB2 = 0; + + for (;;) { +- auto *SuccIterPairBegin = SuccIterPairs.begin(); +- auto &BB1ItrPair = *SuccIterPairBegin++; +- auto OtherSuccIterPairRange = +- iterator_range(SuccIterPairBegin, SuccIterPairs.end()); +- auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange); +- +- Instruction *I1 = &*BB1ItrPair.first; +- auto *BB1 = I1->getParent(); +- +- // Skip debug info if it is not identical. +- bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) { +- Instruction *I2 = &*Iter; +- return I1->isIdenticalToWhenDefined(I2); +- }); +- if (!AllDbgInstsAreIdentical) { +- while (isa(I1)) +- I1 = &*++BB1ItrPair.first; +- for (auto &SuccIter : OtherSuccIterRange) { +- Instruction *I2 = &*SuccIter; +- while (isa(I2)) +- I2 = &*++SuccIter; +- } +- } +- +- bool AllInstsAreIdentical = true; +- bool HasTerminator = I1->isTerminator(); +- for (auto &SuccIter : OtherSuccIterRange) { +- Instruction *I2 = &*SuccIter; +- HasTerminator |= I2->isTerminator(); +- if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2)) +- AllInstsAreIdentical = false; +- } +- + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. +- if (HasTerminator) { ++ if (I1->isTerminator() || I2->isTerminator()) { + // If any instructions remain in the block, we cannot hoist terminators. +- if (NumSkipped || SuccSize != SuccIterPairs.size() || +- !AllInstsAreIdentical) ++ if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) + return Changed; +- SmallVector Insts; +- for (auto &SuccIter : OtherSuccIterRange) +- Insts.push_back(&*SuccIter); +- return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed; +- } +- +- if (AllInstsAreIdentical) { +- unsigned SkipFlagsBB1 = BB1ItrPair.second; +- AllInstsAreIdentical = +- isSafeToHoistInstr(I1, SkipFlagsBB1) && +- all_of(OtherSuccIterPairRange, [=](const auto &Pair) { +- Instruction *I2 = &*Pair.first; +- unsigned SkipFlagsBB2 = Pair.second; +- // Even if the instructions are identical, it may not +- // be safe to hoist them if we have skipped over +- // instructions with side effects or their operands +- // weren't hoisted. +- return isSafeToHoistInstr(I2, SkipFlagsBB2) && +- shouldHoistCommonInstructions(I1, I2, TTI); +- }); +- } +- +- if (AllInstsAreIdentical) { +- BB1ItrPair.first++; +- if (isa(I1)) { ++ goto HoistTerminator; ++ } ++ ++ if (I1->isIdenticalToWhenDefined(I2) && ++ // Even if the instructions are identical, it may not be safe to hoist ++ // them if we have skipped over instructions with side effects or their ++ // operands weren't hoisted. ++ isSafeToHoistInstr(I1, SkipFlagsBB1) && ++ isSafeToHoistInstr(I2, SkipFlagsBB2) && ++ shouldHoistCommonInstructions(I1, I2, TTI)) { ++ if (isa(I1) || isa(I2)) { ++ assert(isa(I1) && isa(I2)); + // The debug location is an integral part of a debug info intrinsic + // and can't be separated from it or replaced. Instead of attempting + // to merge locations, simply hoist both copies of the intrinsic. +- I1->moveBeforePreserving(TI); +- for (auto &SuccIter : OtherSuccIterRange) { +- auto *I2 = &*SuccIter++; +- assert(isa(I2)); +- I2->moveBeforePreserving(TI); +- } ++ I1->moveBeforePreserving(BI); ++ I2->moveBeforePreserving(BI); ++ Changed = true; + } else { + // For a normal instruction, we just move one to right before the + // branch, then replace all uses of the other with the first. Finally, + // we remove the now redundant second instruction. +- I1->moveBeforePreserving(TI); +- BB->splice(TI->getIterator(), BB1, I1->getIterator()); +- for (auto &SuccIter : OtherSuccIterRange) { +- Instruction *I2 = &*SuccIter++; +- assert(I2 != I1); +- if (!I2->use_empty()) +- I2->replaceAllUsesWith(I1); +- I1->andIRFlags(I2); +- combineMetadataForCSE(I1, I2, true); +- // I1 and I2 are being combined into a single instruction. Its debug +- // location is the merged locations of the original instructions. +- I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); +- I2->eraseFromParent(); +- } ++ I1->moveBeforePreserving(BI); ++ if (!I2->use_empty()) ++ I2->replaceAllUsesWith(I1); ++ I1->andIRFlags(I2); ++ combineMetadataForCSE(I1, I2, true); ++ ++ // I1 and I2 are being combined into a single instruction. Its debug ++ // location is the merged locations of the original instructions. ++ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); ++ ++ I2->eraseFromParent(); + } +- if (!Changed) +- NumHoistCommonCode += SuccIterPairs.size(); + Changed = true; +- NumHoistCommonInstrs += SuccIterPairs.size(); ++ ++NumHoistCommonInstrs; + } else { + if (NumSkipped >= HoistCommonSkipLimit) + return Changed; + // We are about to skip over a pair of non-identical instructions. Record + // if any have characteristics that would prevent reordering instructions + // across them. +- for (auto &SuccIterPair : SuccIterPairs) { +- Instruction *I = &*SuccIterPair.first++; +- SuccIterPair.second |= skippedInstrFlags(I); +- } ++ SkipFlagsBB1 |= skippedInstrFlags(I1); ++ SkipFlagsBB2 |= skippedInstrFlags(I2); + ++NumSkipped; + } +- } +-} +- +-bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( +- Instruction *TI, Instruction *I1, +- SmallVectorImpl &OtherSuccTIs) { +- +- auto *BI = dyn_cast(TI); +- +- bool Changed = false; +- BasicBlock *TIParent = TI->getParent(); +- BasicBlock *BB1 = I1->getParent(); + +- // Use only for an if statement. +- auto *I2 = *OtherSuccTIs.begin(); +- auto *BB2 = I2->getParent(); +- if (BI) { +- assert(OtherSuccTIs.size() == 1); +- assert(BI->getSuccessor(0) == I1->getParent()); +- assert(BI->getSuccessor(1) == I2->getParent()); ++ I1 = &*BB1_Itr++; ++ I2 = &*BB2_Itr++; ++ // Skip debug info if it is not identical. ++ DbgInfoIntrinsic *DBI1 = dyn_cast(I1); ++ DbgInfoIntrinsic *DBI2 = dyn_cast(I2); ++ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { ++ while (isa(I1)) ++ I1 = &*BB1_Itr++; ++ while (isa(I2)) ++ I2 = &*BB2_Itr++; ++ } + } + +- // In the case of an if statement, we try to hoist an invoke. ++ return Changed; ++ ++HoistTerminator: ++ // It may not be possible to hoist an invoke. + // FIXME: Can we define a safety predicate for CallBr? +- // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll +- // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit? +- if (isa(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2))) +- return false; ++ if (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ++ return Changed; + + // TODO: callbr hoisting currently disabled pending further study. + if (isa(I1)) +- return false; ++ return Changed; + + for (BasicBlock *Succ : successors(BB1)) { + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); +- for (Instruction *OtherSuccTI : OtherSuccTIs) { +- Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent()); +- if (BB1V == BB2V) +- continue; ++ Value *BB2V = PN.getIncomingValueForBlock(BB2); ++ if (BB1V == BB2V) ++ continue; + +- // In the case of an if statement, check for +- // passingValueIsAlwaysUndefined here because we would rather eliminate +- // undefined control flow then converting it to a select. +- if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) || +- passingValueIsAlwaysUndefined(BB2V, &PN)) +- return false; +- } ++ // Check for passingValueIsAlwaysUndefined here because we would rather ++ // eliminate undefined control flow then converting it to a select. ++ if (passingValueIsAlwaysUndefined(BB1V, &PN) || ++ passingValueIsAlwaysUndefined(BB2V, &PN)) ++ return Changed; + } + } + + // Okay, it is safe to hoist the terminator. + Instruction *NT = I1->clone(); +- NT->insertInto(TIParent, TI->getIterator()); ++ NT->insertInto(BIParent, BI->getIterator()); + if (!NT->getType()->isVoidTy()) { + I1->replaceAllUsesWith(NT); +- for (Instruction *OtherSuccTI : OtherSuccTIs) +- OtherSuccTI->replaceAllUsesWith(NT); ++ I2->replaceAllUsesWith(NT); + NT->takeName(I1); + } + Changed = true; +- NumHoistCommonInstrs += OtherSuccTIs.size() + 1; ++ ++NumHoistCommonInstrs; + + // Ensure terminator gets a debug location, even an unknown one, in case + // it involves inlinable calls. +- SmallVector Locs; +- Locs.push_back(I1->getDebugLoc()); +- for (auto *OtherSuccTI : OtherSuccTIs) +- Locs.push_back(OtherSuccTI->getDebugLoc()); +- NT->setDebugLoc(DILocation::getMergedLocations(Locs)); ++ NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + // PHIs created below will adopt NT's merged DebugLoc. + IRBuilder Builder(NT); + +- // In the case of an if statement, hoisting one of the terminators from our +- // successor is a great thing. Unfortunately, the successors of the if/else +- // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2 +- // must agree for all PHI nodes, so we insert select instruction to compute +- // the final result. +- if (BI) { +- std::map, SelectInst *> InsertedSelects; +- for (BasicBlock *Succ : successors(BB1)) { +- for (PHINode &PN : Succ->phis()) { +- Value *BB1V = PN.getIncomingValueForBlock(BB1); +- Value *BB2V = PN.getIncomingValueForBlock(BB2); +- if (BB1V == BB2V) +- continue; +- +- // These values do not agree. Insert a select instruction before NT +- // that determines the right value. +- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; +- if (!SI) { +- // Propagate fast-math-flags from phi node to its replacement select. +- IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); +- if (isa(PN)) +- Builder.setFastMathFlags(PN.getFastMathFlags()); +- +- SI = cast(Builder.CreateSelect( +- BI->getCondition(), BB1V, BB2V, +- BB1V->getName() + "." + BB2V->getName(), BI)); +- } ++ // Hoisting one of the terminators from our successor is a great thing. ++ // Unfortunately, the successors of the if/else blocks may have PHI nodes in ++ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI ++ // nodes, so we insert select instruction to compute the final result. ++ std::map, SelectInst *> InsertedSelects; ++ for (BasicBlock *Succ : successors(BB1)) { ++ for (PHINode &PN : Succ->phis()) { ++ Value *BB1V = PN.getIncomingValueForBlock(BB1); ++ Value *BB2V = PN.getIncomingValueForBlock(BB2); ++ if (BB1V == BB2V) ++ continue; + +- // Make the PHI node use the select for all incoming values for BB1/BB2 +- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) +- if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) +- PN.setIncomingValue(i, SI); +- } ++ // These values do not agree. Insert a select instruction before NT ++ // that determines the right value. ++ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; ++ if (!SI) { ++ // Propagate fast-math-flags from phi node to its replacement select. ++ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); ++ if (isa(PN)) ++ Builder.setFastMathFlags(PN.getFastMathFlags()); ++ ++ SI = cast( ++ Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, ++ BB1V->getName() + "." + BB2V->getName(), BI)); ++ } ++ ++ // Make the PHI node use the select for all incoming values for BB1/BB2 ++ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) ++ if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) ++ PN.setIncomingValue(i, SI); + } + } + +@@ -1807,16 +1734,16 @@ + + // Update any PHI nodes in our new successors. + for (BasicBlock *Succ : successors(BB1)) { +- AddPredecessorToBlock(Succ, TIParent, BB1); ++ AddPredecessorToBlock(Succ, BIParent, BB1); + if (DTU) +- Updates.push_back({DominatorTree::Insert, TIParent, Succ}); ++ Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + } + + if (DTU) +- for (BasicBlock *Succ : successors(TI)) +- Updates.push_back({DominatorTree::Delete, TIParent, Succ}); ++ for (BasicBlock *Succ : successors(BI)) ++ Updates.push_back({DominatorTree::Delete, BIParent, Succ}); + +- EraseTerminatorAndDCECond(TI); ++ EraseTerminatorAndDCECond(BI); + if (DTU) + DTU->applyUpdates(Updates); + return Changed; +@@ -2850,8 +2777,8 @@ + Value *OrigV = PN.getIncomingValueForBlock(BB); + Value *ThenV = PN.getIncomingValueForBlock(ThenBB); + +- // FIXME: Try to remove some of the duplication with +- // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial. ++ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. ++ // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + +@@ -6888,10 +6815,6 @@ + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + +- if (HoistCommon && +- hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts)) +- return requestResimplify(); +- + return false; + } + +@@ -7158,8 +7081,7 @@ + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { +- if (HoistCommon && hoistCommonCodeFromSuccessors( +- BI->getParent(), !Options.HoistCommonInsts)) ++ if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts)) + return requestResimplify(); + } else { + // If Successor #1 has multiple preds, we may be able to conditionally +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll +--- a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll ++++ b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll +@@ -70,7 +70,7 @@ + i64 4, label %sw.bb4 + ] + sw.bb0: +- call void asm sideeffect "nop", ""() ++ call void asm sideeffect "", ""() + ret void + sw.bb1: + call void asm sideeffect "", ""() +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll +--- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll ++++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll +@@ -19,9 +19,21 @@ + + define void @foo_switch(i64 %C, ptr %P) { + ; CHECK-LABEL: @foo_switch( +-; CHECK-NEXT: common.ret: +-; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4 ++; CHECK-NEXT: switch i64 [[C:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: common.ret: + ; CHECK-NEXT: ret void ++; CHECK: bb0: ++; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4 ++; CHECK-NEXT: br label [[COMMON_RET:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: store i32 7, ptr [[P]], align 4 ++; CHECK-NEXT: br label [[COMMON_RET]] ++; CHECK: bb2: ++; CHECK-NEXT: store i32 7, ptr [[P]], align 4 ++; CHECK-NEXT: br label [[COMMON_RET]] + ; + switch i64 %C, label %bb0 [ + i64 1, label %bb1 +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll +--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll ++++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll +@@ -26,11 +26,27 @@ + + define void @test_switch(i64 %i, ptr %Q) { + ; CHECK-LABEL: @test_switch( +-; CHECK-NEXT: common.ret: ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: common.ret: ++; CHECK-NEXT: ret void ++; CHECK: bb0: + ; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 + ; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4 + ; CHECK-NEXT: call void @bar(i32 [[A]]) +-; CHECK-NEXT: ret void ++; CHECK-NEXT: br label [[COMMON_RET:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: store i32 1, ptr [[Q]], align 4 ++; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4 ++; CHECK-NEXT: call void @bar(i32 [[B]]) ++; CHECK-NEXT: br label [[COMMON_RET]] ++; CHECK: bb2: ++; CHECK-NEXT: store i32 1, ptr [[Q]], align 4 ++; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[Q]], align 4 ++; CHECK-NEXT: call void @bar(i32 [[C]]) ++; CHECK-NEXT: br label [[COMMON_RET]] + ; + switch i64 %i, label %bb0 [ + i64 1, label %bb1 +@@ -53,41 +69,25 @@ + ret void + } + +-; We ensure that we examine all instructions during each iteration to confirm the presence of a terminating one. +-define void @test_switch_reach_terminator(i64 %i, ptr %p) { +-; CHECK-LABEL: @test_switch_reach_terminator( +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[COMMON_RET:%.*]] +-; CHECK-NEXT: ] +-; CHECK: common.ret: +-; CHECK-NEXT: ret void +-; CHECK: bb0: +-; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4 +-; CHECK-NEXT: br label [[COMMON_RET]] +-; CHECK: bb1: +-; CHECK-NEXT: store i32 2, ptr [[P]], align 4 +-; CHECK-NEXT: br label [[COMMON_RET]] +-; +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +-bb0: ; preds = %0 +- store i32 1, ptr %p +- ret void +-bb1: ; preds = %0 +- store i32 2, ptr %p +- ret void +-bb2: ; preds = %0 +- ret void +-} +- + define i1 @common_instr_on_switch(i64 %a, i64 %b, i64 %c) unnamed_addr { + ; CHECK-LABEL: @common_instr_on_switch( + ; CHECK-NEXT: start: ++; CHECK-NEXT: switch i64 [[A:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: + ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +-; CHECK-NEXT: ret i1 [[TMP0]] ++; CHECK-NEXT: br label [[EXIT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[B]], [[C]] ++; CHECK-NEXT: br label [[EXIT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] ++; CHECK-NEXT: br label [[EXIT]] ++; CHECK: exit: ++; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] ++; CHECK-NEXT: ret i1 [[RESULT]] + ; + start: + switch i64 %a, label %bb0 [ +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll +--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll ++++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll +@@ -4,8 +4,25 @@ + define i1 @common_instr_with_unreachable(i64 %a, i64 %b, i64 %c) { + ; CHECK-LABEL: @common_instr_with_unreachable( + ; CHECK-NEXT: start: ++; CHECK-NEXT: switch i64 [[A:%.*]], label [[UNREACHABLE:%.*]] [ ++; CHECK-NEXT: i64 0, label [[BB0:%.*]] ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: unreachable: ++; CHECK-NEXT: unreachable ++; CHECK: bb0: + ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +-; CHECK-NEXT: ret i1 [[TMP0]] ++; CHECK-NEXT: br label [[EXIT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[B]], [[C]] ++; CHECK-NEXT: br label [[EXIT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] ++; CHECK-NEXT: br label [[EXIT]] ++; CHECK: exit: ++; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] ++; CHECK-NEXT: ret i1 [[RESULT]] + ; + start: + switch i64 %a, label %unreachable [ +@@ -37,90 +54,43 @@ + define i1 @common_instr_with_unreachable_2(i64 %a, i64 %b, i64 %c) { + ; CHECK-LABEL: @common_instr_with_unreachable_2( + ; CHECK-NEXT: start: +-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +-; CHECK-NEXT: ret i1 [[TMP0]] +-; +-start: +- switch i64 %a, label %bb1 [ +- i64 0, label %bb0 +- i64 1, label %unreachable +- i64 2, label %bb2 +- ] +- +-unreachable: +- unreachable +- +-bb0: ; preds = %start +- %0 = icmp eq i64 %b, %c +- br label %exit +- +-bb1: ; preds = %start +- %1 = icmp eq i64 %b, %c +- br label %exit +- +-bb2: ; preds = %start +- %2 = icmp eq i64 %b, %c +- br label %exit +- +-exit: ; preds = %bb2, %bb1, %bb0 +- %result = phi i1 [ %0, %bb0 ], [ %1, %bb1 ], [ %2, %bb2 ] +- ret i1 %result +-} +- +-declare void @no_return() +-declare void @foo() +- +-define i1 @not_only_unreachable(i64 %a, i64 %b, i64 %c) { +-; CHECK-LABEL: @not_only_unreachable( +-; CHECK-NEXT: start: +-; CHECK-NEXT: switch i64 [[A:%.*]], label [[UNREACHABLE:%.*]] [ ++; CHECK-NEXT: switch i64 [[A:%.*]], label [[BB1:%.*]] [ + ; CHECK-NEXT: i64 0, label [[BB0:%.*]] +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] + ; CHECK-NEXT: i64 2, label [[BB2:%.*]] + ; CHECK-NEXT: ] +-; CHECK: unreachable: +-; CHECK-NEXT: call void @no_return() +-; CHECK-NEXT: unreachable + ; CHECK: bb0: + ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] +-; CHECK-NEXT: call void @foo() + ; CHECK-NEXT: br label [[EXIT:%.*]] + ; CHECK: bb1: + ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[B]], [[C]] +-; CHECK-NEXT: call void @foo() + ; CHECK-NEXT: br label [[EXIT]] + ; CHECK: bb2: + ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] +-; CHECK-NEXT: call void @foo() + ; CHECK-NEXT: br label [[EXIT]] + ; CHECK: exit: + ; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] + ; CHECK-NEXT: ret i1 [[RESULT]] + ; + start: +- switch i64 %a, label %unreachable [ ++ switch i64 %a, label %bb1 [ + i64 0, label %bb0 +- i64 1, label %bb1 ++ i64 1, label %unreachable + i64 2, label %bb2 + ] + + unreachable: +- call void @no_return() + unreachable + + bb0: ; preds = %start + %0 = icmp eq i64 %b, %c +- call void @foo() + br label %exit + + bb1: ; preds = %start + %1 = icmp eq i64 %b, %c +- call void @foo() + br label %exit + + bb2: ; preds = %start + %2 = icmp eq i64 %b, %c +- call void @foo() + br label %exit + + exit: ; preds = %bb2, %bb1, %bb0 +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll +--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll ++++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll +@@ -48,68 +48,6 @@ + ret void + } + +-define void @f0_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +-; CHECK-LABEL: @f0_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP0]], 1 +-; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD]], [[TMP1]] +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[TMP0]], 1 +-; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[SUB]], 3 +-; CHECK-NEXT: [[V:%.*]] = add i16 [[SUB]], [[TMP2]] +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i16 [[TMP0]], 1 +-; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[SUB2]], 3 +-; CHECK-NEXT: [[W:%.*]] = add i16 [[SUB2]], [[TMP3]] +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[V]], [[BB1]] ], [ [[W]], [[BB2]] ] +-; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +-; CHECK-NEXT: ret void +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %0 = load i16, ptr %b, align 2 +- %add = add nsw i16 %0, 1 +- %1 = load i16, ptr %m, align 2 +- %u = add i16 %add, %1 +- br label %end +- +-bb1: +- %2 = load i16, ptr %b, align 2 +- %sub = sub nsw i16 %2, 1 +- %3 = load i16, ptr %m, align 2 +- %4 = add i16 %sub, 3 +- %v = add i16 %sub, %4 +- br label %end +- +-bb2: +- %5 = load i16, ptr %b, align 2 +- %sub2 = sub nsw i16 %5, 1 +- %6 = load i16, ptr %m, align 2 +- %7 = add i16 %sub2, 3 +- %w = add i16 %sub2, %7 +- br label %end +- +-end: +- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] +- store i16 %uv, ptr %d, align 2 +- ret void +-} + + ;; Check some instructions (e.g. add) can be reordered across instructions with side + ;; effects, while others (e.g. load) can't. +@@ -159,70 +97,6 @@ + ret void + } + +-define void @f2_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +-; CHECK-LABEL: @f2_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: [[ADD_0:%.*]] = add nsw i16 [[TMP0]], 1 +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: call void @side_effects0() +-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +-; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD_0]], [[TMP1]] +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: call void @no_side_effects0() +-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[M]], align 2 +-; CHECK-NEXT: [[V:%.*]] = add i16 [[ADD_0]], [[TMP2]] +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: call void @no_side_effects0() +-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[M]], align 2 +-; CHECK-NEXT: [[W:%.*]] = add i16 [[ADD_0]], [[TMP3]] +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[V]], [[BB1]] ], [ [[W]], [[BB2]] ] +-; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +-; CHECK-NEXT: ret void +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %0 = load i16, ptr %b, align 2 +- call void @side_effects0() +- %add.0 = add nsw i16 %0, 1 +- %1 = load i16, ptr %m, align 2 +- %u = add i16 %add.0, %1 +- br label %end +- +-bb1: +- %2 = load i16, ptr %b, align 2 +- call void @no_side_effects0() +- %add.1 = add nsw i16 %2, 1 +- %3 = load i16, ptr %m, align 2 +- %v = add i16 %add.1, %3 +- br label %end +- +-bb2: +- %4 = load i16, ptr %b, align 2 +- call void @no_side_effects0() +- %add.2 = add nsw i16 %4, 1 +- %5 = load i16, ptr %m, align 2 +- %w = add i16 %add.2, %5 +- br label %end +- +-end: +- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] +- store i16 %uv, ptr %d, align 2 +- ret void +-} + + ;; Check indeed it was the side effects that prevented hoisting the load + ;; in the previous test. +@@ -269,67 +143,6 @@ + ret void + } + +-define void @f3_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +-; CHECK-LABEL: @f3_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: [[ADD_0:%.*]] = add nsw i16 [[TMP0]], 1 +-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +-; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD_0]], [[TMP1]] +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: call void @no_side_effects0() +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: call void @no_side_effects1() +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: call void @no_side_effects1() +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[U]], [[BB1]] ], [ [[U]], [[BB2]] ] +-; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +-; CHECK-NEXT: ret void +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %0 = load i16, ptr %b, align 2 +- call void @no_side_effects0() +- %add.0 = add nsw i16 %0, 1 +- %1 = load i16, ptr %m, align 2 +- %u = add i16 %add.0, %1 +- br label %end +- +-bb1: +- %2 = load i16, ptr %b, align 2 +- call void @no_side_effects1() +- %add.1 = add nsw i16 %2, 1 +- %3 = load i16, ptr %m, align 2 +- %v = add i16 %add.1, %3 +- br label %end +- +-bb2: +- %4 = load i16, ptr %b, align 2 +- call void @no_side_effects1() +- %add.2 = add nsw i16 %4, 1 +- %5 = load i16, ptr %m, align 2 +- %w = add i16 %add.2, %5 +- br label %end +- +-end: +- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] +- store i16 %uv, ptr %d, align 2 +- ret void +-} +- + ;; Check some instructions (e.g. sdiv) are not speculatively executed. + + ;; Division by non-zero constant OK to speculate ... +@@ -373,63 +186,6 @@ + ret void + } + +-define void @f4_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +-; CHECK-LABEL: @f4_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 [[TMP0]], 2 +-; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: call void @side_effects0() +-; CHECK-NEXT: br label [[IF_END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: call void @side_effects1() +-; CHECK-NEXT: br label [[IF_END]] +-; CHECK: bb2: +-; CHECK-NEXT: call void @side_effects1() +-; CHECK-NEXT: br label [[IF_END]] +-; CHECK: if.end: +-; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[U]], [[BB1]] ], [ [[U]], [[BB2]] ] +-; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +-; CHECK-NEXT: ret void +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %0 = load i16, ptr %b, align 2 +- call void @side_effects0() +- %div.0 = sdiv i16 %0, 2 +- %u = add i16 %div.0, %0 +- br label %if.end +- +-bb1: +- %1 = load i16, ptr %b, align 2 +- call void @side_effects1() +- %div.1 = sdiv i16 %1, 2 +- %v = add i16 %div.1, %1 +- br label %if.end +- +-bb2: +- %2 = load i16, ptr %b, align 2 +- call void @side_effects1() +- %div.2 = sdiv i16 %2, 2 +- %w = add i16 %div.2, %2 +- br label %if.end +- +-if.end: +- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] +- store i16 %uv, ptr %d, align 2 +- ret void +-} +- + ;; ... but not a general division ... + define void @f5(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { + ; CHECK-LABEL: @f5( +@@ -474,67 +230,6 @@ + ret void + } + +-define void @f5_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +-; CHECK-LABEL: @f5_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: call void @side_effects0() +-; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 211, [[TMP0]] +-; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: call void @side_effects1() +-; CHECK-NEXT: [[DIV_1:%.*]] = sdiv i16 211, [[TMP0]] +-; CHECK-NEXT: [[V:%.*]] = add i16 [[DIV_1]], [[TMP0]] +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: call void @side_effects1() +-; CHECK-NEXT: [[DIV_2:%.*]] = sdiv i16 211, [[TMP0]] +-; CHECK-NEXT: [[W:%.*]] = add i16 [[DIV_2]], [[TMP0]] +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[V]], [[BB1]] ], [ [[W]], [[BB2]] ] +-; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +-; CHECK-NEXT: ret void +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %0 = load i16, ptr %b, align 2 +- call void @side_effects0() +- %div.0 = sdiv i16 211, %0 +- %u = add i16 %div.0, %0 +- br label %end +- +-bb1: +- %1 = load i16, ptr %b, align 2 +- call void @side_effects1() +- %div.1 = sdiv i16 211, %1 +- %v = add i16 %div.1, %1 +- br label %end +- +-bb2: +- %2 = load i16, ptr %b, align 2 +- call void @side_effects1() +- %div.2 = sdiv i16 211, %2 +- %w = add i16 %div.2, %2 +- br label %end +- +-end: +- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] +- store i16 %uv, ptr %d, align 2 +- ret void +-} +- + ;; ... and it's also OK to hoist the division when there's no speculation happening. + define void @f6(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { + ; CHECK-LABEL: @f6( +@@ -576,63 +271,6 @@ + ret void + } + +-define void @f6_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +-; CHECK-LABEL: @f6_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 211, [[TMP0]] +-; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: call void @no_side_effects0() +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: call void @no_side_effects1() +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: call void @no_side_effects1() +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[U]], [[BB1]] ], [ [[U]], [[BB2]] ] +-; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +-; CHECK-NEXT: ret void +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %0 = load i16, ptr %b, align 2 +- call void @no_side_effects0() +- %div.0 = sdiv i16 211, %0 +- %u = add i16 %div.0, %0 +- br label %end +- +-bb1: +- %1 = load i16, ptr %b, align 2 +- call void @no_side_effects1() +- %div.1 = sdiv i16 211, %1 +- %v = add i16 %div.1, %1 +- br label %end +- +-bb2: +- %2 = load i16, ptr %b, align 2 +- call void @no_side_effects1() +- %div.2 = sdiv i16 211, %2 +- %w = add i16 %div.2, %2 +- br label %end +- +-end: +- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] +- store i16 %uv, ptr %d, align 2 +- ret void +-} +- + ;; No reorder of store over a load. + define i16 @f7(i1 %c, ptr %a, ptr %b) { + ; CHECK-LABEL: @f7( +@@ -668,55 +306,6 @@ + ret i16 %v + } + +-define i16 @f7_switch(i64 %i, ptr %a, ptr %b) { +-; CHECK-LABEL: @f7_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: [[VA:%.*]] = load i16, ptr [[A:%.*]], align 2 +-; CHECK-NEXT: store i16 0, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B]], align 2 +-; CHECK-NEXT: store i16 0, ptr [[B]], align 2 +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: [[VC:%.*]] = load i16, ptr [[B]], align 2 +-; CHECK-NEXT: store i16 0, ptr [[B]], align 2 +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[BB0]] ], [ [[VB]], [[BB1]] ], [ [[VC]], [[BB2]] ] +-; CHECK-NEXT: ret i16 [[V]] +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %va = load i16, ptr %a, align 2 +- store i16 0, ptr %b, align 2 +- br label %end +- +-bb1: +- %vb = load i16, ptr %b, align 2 +- store i16 0, ptr %b, align 2 +- br label %end +- +-bb2: +- %vc = load i16, ptr %b, align 2 +- store i16 0, ptr %b, align 2 +- br label %end +- +-end: +- %v = phi i16 [ %va, %bb0 ], [ %vb, %bb1 ], [ %vc, %bb2 ] +- ret i16 %v +-} +- + ;; Can reorder load over another load + define i16 @f8(i1 %cond, ptr %a, ptr %b, ptr %c) { + ; CHECK-LABEL: @f8( +@@ -757,59 +346,6 @@ + ret i16 %w + } + +-define i16 @f8_switch(i64 %i, ptr %a, ptr %b, ptr %c) { +-; CHECK-LABEL: @f8_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: [[C_0:%.*]] = load i16, ptr [[C:%.*]], align 2 +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: [[VA:%.*]] = load i16, ptr [[A:%.*]], align 2 +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: [[VC:%.*]] = load i16, ptr [[B]], align 2 +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[BB0]] ], [ [[VB]], [[BB1]] ], [ [[VC]], [[BB2]] ] +-; CHECK-NEXT: [[U:%.*]] = phi i16 [ [[C_0]], [[BB0]] ], [ [[C_0]], [[BB1]] ], [ [[C_0]], [[BB2]] ] +-; CHECK-NEXT: [[W:%.*]] = add i16 [[V]], [[U]] +-; CHECK-NEXT: ret i16 [[W]] +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %va = load i16, ptr %a, align 2 +- %c.0 = load i16, ptr %c +- br label %end +- +-bb1: +- %vb = load i16, ptr %b, align 2 +- %c.1 = load i16, ptr %c +- br label %end +- +-bb2: +- %vc = load i16, ptr %b, align 2 +- %c.2 = load i16, ptr %c +- br label %end +- +-end: +- %v = phi i16 [ %va, %bb0 ], [ %vb, %bb1 ], [ %vc, %bb2 ] +- %u = phi i16 [ %c.0, %bb0 ], [ %c.1, %bb1 ], [ %c.2, %bb2 ] +- +- %w = add i16 %v, %u +- +- ret i16 %w +-} +- + ;; Currently won't reorder volatile and non-volatile loads. + define i16 @f9(i1 %cond, ptr %a, ptr %b, ptr %c) { + ; CHECK-LABEL: @f9( +@@ -851,61 +387,6 @@ + ret i16 %w + } + +-define i16 @f9_switch(i64 %i, ptr %a, ptr %b, ptr %c) { +-; CHECK-LABEL: @f9_switch( +-; CHECK-NEXT: entry: +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: [[VA:%.*]] = load volatile i16, ptr [[A:%.*]], align 2 +-; CHECK-NEXT: [[C_0:%.*]] = load i16, ptr [[C:%.*]], align 2 +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B:%.*]], align 2 +-; CHECK-NEXT: [[C_1:%.*]] = load i16, ptr [[C]], align 2 +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: [[VC:%.*]] = load i16, ptr [[B]], align 2 +-; CHECK-NEXT: [[C_2:%.*]] = load i16, ptr [[C]], align 2 +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[BB0]] ], [ [[VB]], [[BB1]] ], [ [[VC]], [[BB2]] ] +-; CHECK-NEXT: [[U:%.*]] = phi i16 [ [[C_0]], [[BB0]] ], [ [[C_1]], [[BB1]] ], [ [[C_2]], [[BB2]] ] +-; CHECK-NEXT: [[W:%.*]] = add i16 [[V]], [[U]] +-; CHECK-NEXT: ret i16 [[W]] +-; +-entry: +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %va = load volatile i16, ptr %a, align 2 +- %c.0 = load i16, ptr %c +- br label %end +- +-bb1: +- %vb = load i16, ptr %b, align 2 +- %c.1 = load i16, ptr %c +- br label %end +- +-bb2: +- %vc = load i16, ptr %b, align 2 +- %c.2 = load i16, ptr %c +- br label %end +- +-end: +- %v = phi i16 [ %va, %bb0 ], [ %vb, %bb1 ], [ %vc, %bb2 ] +- %u = phi i16 [ %c.0, %bb0 ], [ %c.1, %bb1 ], [ %c.2, %bb2 ] +- +- %w = add i16 %v, %u +- +- ret i16 %w +-} +- + ;; Don't hoist stacksaves across inalloca allocas + define void @f10(i1 %cond) { + ; CHECK-LABEL: @f10( +@@ -953,79 +434,6 @@ + br label %end + + end: +- call void @llvm.stackrestore(ptr %ss) +- ret void +-} +- +-define void @f10_switch(i64 %i) { +-; CHECK-LABEL: @f10_switch( +-; CHECK-NEXT: [[SS:%.*]] = call ptr @llvm.stacksave.p0() +-; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ +-; CHECK-NEXT: i64 1, label [[BB1:%.*]] +-; CHECK-NEXT: i64 2, label [[BB2:%.*]] +-; CHECK-NEXT: ] +-; CHECK: bb0: +-; CHECK-NEXT: [[I1:%.*]] = alloca inalloca i32, align 4 +-; CHECK-NEXT: [[SS2:%.*]] = call ptr @llvm.stacksave.p0() +-; CHECK-NEXT: [[I2:%.*]] = alloca inalloca i64, align 8 +-; CHECK-NEXT: call void @inalloca_i64(ptr inalloca(i64) [[I2]]) +-; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS2]]) +-; CHECK-NEXT: call void @inalloca_i32(ptr inalloca(i32) [[I1]]) +-; CHECK-NEXT: br label [[END:%.*]] +-; CHECK: bb1: +-; CHECK-NEXT: [[I3:%.*]] = alloca inalloca i64, align 8 +-; CHECK-NEXT: [[SS3:%.*]] = call ptr @llvm.stacksave.p0() +-; CHECK-NEXT: [[I4:%.*]] = alloca inalloca i64, align 8 +-; CHECK-NEXT: [[TMP1:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I4]]) +-; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS3]]) +-; CHECK-NEXT: [[TMP2:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I3]]) +-; CHECK-NEXT: br label [[END]] +-; CHECK: bb2: +-; CHECK-NEXT: [[I5:%.*]] = alloca inalloca i64, align 8 +-; CHECK-NEXT: [[SS4:%.*]] = call ptr @llvm.stacksave.p0() +-; CHECK-NEXT: [[I6:%.*]] = alloca inalloca i64, align 8 +-; CHECK-NEXT: [[TMP3:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I6]]) +-; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS4]]) +-; CHECK-NEXT: [[TMP4:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I5]]) +-; CHECK-NEXT: br label [[END]] +-; CHECK: end: +-; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS]]) +-; CHECK-NEXT: ret void +-; +- %ss = call ptr @llvm.stacksave() +- switch i64 %i, label %bb0 [ +- i64 1, label %bb1 +- i64 2, label %bb2 +- ] +- +-bb0: +- %i1 = alloca inalloca i32 +- %ss2 = call ptr @llvm.stacksave() +- %i2 = alloca inalloca i64 +- call void @inalloca_i64(ptr inalloca(i64) %i2) +- call void @llvm.stackrestore(ptr %ss2) +- call void @inalloca_i32(ptr inalloca(i32) %i1) +- br label %end +- +-bb1: +- %i3 = alloca inalloca i64 +- %ss3 = call ptr @llvm.stacksave() +- %i4 = alloca inalloca i64 +- call ptr @inalloca_i64(ptr inalloca(i64) %i4) +- call void @llvm.stackrestore(ptr %ss3) +- call ptr @inalloca_i64(ptr inalloca(i64) %i3) +- br label %end +- +-bb2: +- %i5 = alloca inalloca i64 +- %ss4 = call ptr @llvm.stacksave() +- %i6 = alloca inalloca i64 +- call ptr @inalloca_i64(ptr inalloca(i64) %i6) +- call void @llvm.stackrestore(ptr %ss4) +- call ptr @inalloca_i64(ptr inalloca(i64) %i5) +- br label %end +- +-end: + call void @llvm.stackrestore(ptr %ss) + ret void + } +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +--- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll ++++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +@@ -21,8 +21,20 @@ + + define void @hoist_range_switch(i64 %i, ptr %p) { + ; CHECK-LABEL: @hoist_range_switch( +-; CHECK-NEXT: out: ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: + ; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !range [[RNG1:![0-9]+]] ++; CHECK-NEXT: br label [[OUT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[E:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG2:![0-9]+]] ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[F:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG3:![0-9]+]] ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: out: + ; CHECK-NEXT: ret void + ; + switch i64 %i, label %bb0 [ +@@ -45,7 +57,7 @@ + define void @hoist_both_noundef(i1 %c, ptr %p) { + ; CHECK-LABEL: @hoist_both_noundef( + ; CHECK-NEXT: if: +-; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !2 ++; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !4 + ; CHECK-NEXT: ret void + ; + if: +@@ -66,8 +78,20 @@ + + define void @hoist_both_noundef_switch(i64 %i, ptr %p) { + ; CHECK-LABEL: @hoist_both_noundef_switch( +-; CHECK-NEXT: out: +-; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !2 ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: ++; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !4 ++; CHECK-NEXT: br label [[OUT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[E:%.*]] = load i8, ptr [[P]], align 1, !noundef !4 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[F:%.*]] = load i8, ptr [[P]], align 1, !noundef !4 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: out: + ; CHECK-NEXT: ret void + ; + switch i64 %i, label %bb0 [ +@@ -110,8 +134,20 @@ + + define void @hoist_one_noundef_switch(i64 %i, ptr %p) { + ; CHECK-LABEL: @hoist_one_noundef_switch( +-; CHECK-NEXT: out: +-; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1 ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: ++; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !4 ++; CHECK-NEXT: br label [[OUT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[E:%.*]] = load i8, ptr [[P]], align 1 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[F:%.*]] = load i8, ptr [[P]], align 1, !noundef !4 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: out: + ; CHECK-NEXT: ret void + ; + switch i64 %i, label %bb0 [ +@@ -134,7 +170,7 @@ + define void @hoist_dereferenceable(i1 %c, ptr %p) { + ; CHECK-LABEL: @hoist_dereferenceable( + ; CHECK-NEXT: if: +-; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !3 ++; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !5 + ; CHECK-NEXT: ret void + ; + if: +@@ -151,8 +187,20 @@ + + define void @hoist_dereferenceable_switch(i64 %i, ptr %p) { + ; CHECK-LABEL: @hoist_dereferenceable_switch( +-; CHECK-NEXT: out: +-; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !3 ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: ++; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !5 ++; CHECK-NEXT: br label [[OUT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[E:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !6 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[F:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: out: + ; CHECK-NEXT: ret void + ; + switch i64 %i, label %bb0 [ +@@ -175,7 +223,7 @@ + define void @hoist_dereferenceable_or_null(i1 %c, ptr %p) { + ; CHECK-LABEL: @hoist_dereferenceable_or_null( + ; CHECK-NEXT: if: +-; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !3 ++; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !5 + ; CHECK-NEXT: ret void + ; + if: +@@ -192,8 +240,20 @@ + + define void @hoist_dereferenceable_or_null_switch(i64 %i, ptr %p) { + ; CHECK-LABEL: @hoist_dereferenceable_or_null_switch( +-; CHECK-NEXT: out: +-; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !3 ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: ++; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !6 ++; CHECK-NEXT: br label [[OUT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[E:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !5 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[F:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !7 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: out: + ; CHECK-NEXT: ret void + ; + switch i64 %i, label %bb0 [ +@@ -217,7 +277,7 @@ + define i32 @speculate_range(i1 %c, ptr dereferenceable(8) align 8 %p) { + ; CHECK-LABEL: @speculate_range( + ; CHECK-NEXT: entry: +-; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG4:![0-9]+]] ++; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG8:![0-9]+]] + ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[V]], i32 0 + ; CHECK-NEXT: ret i32 [[SPEC_SELECT]] + ; +@@ -238,7 +298,7 @@ + define ptr @speculate_nonnull(i1 %c, ptr dereferenceable(8) align 8 %p) { + ; CHECK-LABEL: @speculate_nonnull( + ; CHECK-NEXT: entry: +-; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !2 ++; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !4 + ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null + ; CHECK-NEXT: ret ptr [[SPEC_SELECT]] + ; +@@ -259,7 +319,7 @@ + define ptr @speculate_align(i1 %c, ptr dereferenceable(8) align 8 %p) { + ; CHECK-LABEL: @speculate_align( + ; CHECK-NEXT: entry: +-; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align !5 ++; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align !9 + ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null + ; CHECK-NEXT: ret ptr [[SPEC_SELECT]] + ; +@@ -278,7 +338,7 @@ + define void @hoist_fpmath(i1 %c, double %x) { + ; CHECK-LABEL: @hoist_fpmath( + ; CHECK-NEXT: if: +-; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !6 ++; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !10 + ; CHECK-NEXT: ret void + ; + if: +@@ -295,8 +355,20 @@ + + define void @hoist_fpmath_switch(i64 %i, double %x) { + ; CHECK-LABEL: @hoist_fpmath_switch( +-; CHECK-NEXT: out: +-; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !6 ++; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ ++; CHECK-NEXT: i64 1, label [[BB1:%.*]] ++; CHECK-NEXT: i64 2, label [[BB2:%.*]] ++; CHECK-NEXT: ] ++; CHECK: bb0: ++; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !10 ++; CHECK-NEXT: br label [[OUT:%.*]] ++; CHECK: bb1: ++; CHECK-NEXT: [[E:%.*]] = fadd double [[X]], 1.000000e+00, !fpmath !11 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: bb2: ++; CHECK-NEXT: [[F:%.*]] = fadd double [[X]], 1.000000e+00, !fpmath !12 ++; CHECK-NEXT: br label [[OUT]] ++; CHECK: out: + ; CHECK-NEXT: ret void + ; + switch i64 %i, label %bb0 [ +@@ -322,10 +394,16 @@ + !3 = !{ i8 7, i8 9 } + ;. + ; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5} +-; CHECK: [[RNG1]] = !{i8 0, i8 1, i8 3, i8 5, i8 7, i8 9} +-; CHECK: [[META2:![0-9]+]] = !{} +-; CHECK: [[META3:![0-9]+]] = !{i64 10} +-; CHECK: [[RNG4]] = !{i32 0, i32 10} +-; CHECK: [[META5:![0-9]+]] = !{i64 4} +-; CHECK: [[META6:![0-9]+]] = !{float 2.500000e+00} ++; CHECK: [[RNG1]] = !{i8 0, i8 1} ++; CHECK: [[RNG2]] = !{i8 3, i8 5} ++; CHECK: [[RNG3]] = !{i8 7, i8 9} ++; CHECK: [[META4:![0-9]+]] = !{} ++; CHECK: [[META5:![0-9]+]] = !{i64 10} ++; CHECK: [[META6:![0-9]+]] = !{i64 20} ++; CHECK: [[META7:![0-9]+]] = !{i64 30} ++; CHECK: [[RNG8]] = !{i32 0, i32 10} ++; CHECK: [[META9:![0-9]+]] = !{i64 4} ++; CHECK: [[META10:![0-9]+]] = !{float 2.500000e+00} ++; CHECK: [[META11:![0-9]+]] = !{float 5.000000e+00} ++; CHECK: [[META12:![0-9]+]] = !{float 7.500000e+00} + ;. diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index a1abdc50e811c6..6b039a5a6e9efe 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "2baf4a06ef06c51c2ef09f981f204983b0f8082c" - LLVM_SHA256 = "0ce881f09d65b27810160d02842d42259209506fac98f3f9389059c8b8429d69" + LLVM_COMMIT = "afd7db48c55cb87566758e961f1ebac8af16b8bc" + LLVM_SHA256 = "64f1436eb824ee7f6125ae06c7c337c8edfa8763767f38d7fd218ca02b0311c3" tf_http_archive( name = name, From 463c8cd5de83752f9785f5eced74d1adaac7ddc1 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 20 Sep 2023 19:07:39 -0700 Subject: [PATCH 069/567] [stream_executor] NFC: Remove platform/logging.h from StreamExecutor https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 567148608 --- third_party/xla/xla/stream_executor/BUILD | 5 ++++ .../xla/xla/stream_executor/cuda/cuda_blas.cc | 2 +- .../stream_executor/cuda/cuda_diagnostics.cc | 6 ++-- .../xla/xla/stream_executor/cuda/cuda_dnn.cc | 2 +- .../xla/stream_executor/cuda/cuda_driver.cc | 2 +- .../xla/xla/stream_executor/cuda/cuda_fft.cc | 2 +- .../xla/xla/stream_executor/device_options.h | 2 +- third_party/xla/xla/stream_executor/dnn.h | 2 +- third_party/xla/xla/stream_executor/gpu/BUILD | 1 + .../xla/xla/stream_executor/gpu/gpu_kernel.h | 2 +- third_party/xla/xla/stream_executor/kernel.cc | 2 +- .../xla/xla/stream_executor/kernel_spec.h | 2 +- .../xla/xla/stream_executor/module_spec.h | 2 +- .../xla/xla/stream_executor/platform.cc | 2 +- .../xla/xla/stream_executor/platform/BUILD | 1 - .../xla/stream_executor/platform/logging.h | 30 ------------------- .../xla/xla/stream_executor/rocm/rocm_blas.cc | 2 +- .../stream_executor/rocm/rocm_diagnostics.cc | 2 +- .../xla/xla/stream_executor/rocm/rocm_dnn.cc | 2 +- .../xla/stream_executor/rocm/rocm_driver.cc | 2 +- .../xla/xla/stream_executor/rocm/rocm_fft.cc | 5 ++-- .../stream_executor/rocm/rocm_gpu_executor.cc | 4 +-- third_party/xla/xla/stream_executor/stream.cc | 8 ++--- .../temporary_memory_manager.cc | 2 +- 24 files changed, 32 insertions(+), 60 deletions(-) delete mode 100644 third_party/xla/xla/stream_executor/platform/logging.h diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index f4b13f734c8301..5d40427a31f392 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -177,6 +177,7 @@ cc_library( "//xla/stream_executor/platform", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", + "@local_tsl//tsl/platform:logging", ], ) @@ -193,6 +194,7 @@ cc_library( deps = [ "//xla/stream_executor/platform", "@com_google_absl//absl/types:span", + "@local_tsl//tsl/platform:logging", ], ) @@ -249,6 +251,7 @@ cc_library( deps = [ "//xla/stream_executor/platform", "@com_google_absl//absl/strings", + "@local_tsl//tsl/platform:logging", ], ) @@ -281,6 +284,7 @@ cc_library( "//xla/stream_executor/platform", "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", ], @@ -699,6 +703,7 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", + "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", ], diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc b/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc index f9a8ceef02bbf2..d9d5451eb912a7 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc @@ -37,11 +37,11 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_types.h" #include "xla/stream_executor/numeric_options.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/scratch_allocator.h" #include "xla/stream_executor/stream_executor.h" +#include "tsl/platform/logging.h" #include "tsl/platform/status.h" #include "tsl/platform/tensor_float_32_utils.h" diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_diagnostics.cc b/third_party/xla/xla/stream_executor/cuda/cuda_diagnostics.cc index 4a5395f7a8c8e1..ad1aa615f4cce5 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_diagnostics.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_diagnostics.cc @@ -45,8 +45,8 @@ limitations under the License. #include "absl/strings/str_format.h" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" -#include "xla/stream_executor/platform/logging.h" #include "tsl/platform/host_info.h" +#include "tsl/platform/logging.h" #include "tsl/platform/status.h" namespace stream_executor { @@ -245,8 +245,8 @@ tsl::StatusOr Diagnostician::FindDsoVersion() { #if !defined(PLATFORM_WINDOWS) && !defined(ANDROID_TEGRA) // Callback used when iterating through DSOs. Looks for the driver-interfacing // DSO and yields its version number into the callback data, when found. - auto iterate_phdr = - [](struct dl_phdr_info *info, size_t size, void *data) -> int { + auto iterate_phdr = [](struct dl_phdr_info *info, size_t size, + void *data) -> int { if (strstr(info->dlpi_name, "libcuda.so.1")) { VLOG(1) << "found DLL info with name: " << info->dlpi_name; char resolved_path[PATH_MAX] = {0}; diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc b/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc index 3a174e92377600..c6496ca684ab85 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc @@ -46,7 +46,6 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_timer.h" #include "xla/stream_executor/numeric_options.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/scratch_allocator.h" #include "xla/stream_executor/stream.h" @@ -54,6 +53,7 @@ limitations under the License. #include "xla/stream_executor/stream_executor_pimpl.h" #include "tsl/cuda/cudnn_version.h" #include "tsl/platform/errors.h" +#include "tsl/platform/logging.h" #include "tsl/platform/tensor_float_32_utils.h" #include "tsl/util/env_var.h" diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc index b1e217caab9bfd..bf180cb86486d1 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc @@ -40,10 +40,10 @@ limitations under the License. #include "third_party/gpus/cuda/include/driver_types.h" #include "xla/stream_executor/cuda/cuda_diagnostics.h" #include "xla/stream_executor/gpu/gpu_driver.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "tsl/platform/env.h" #include "tsl/platform/errors.h" +#include "tsl/platform/logging.h" #include "tsl/platform/stacktrace.h" #include "tsl/platform/static_threadlocal.h" #include "tsl/platform/status.h" diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_fft.cc b/third_party/xla/xla/stream_executor/cuda/cuda_fft.cc index 8e27874af32f04..f7eec72b14e3a5 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_fft.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_fft.cc @@ -27,12 +27,12 @@ limitations under the License. #include "xla/stream_executor/cuda/cuda_stream.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor_internal.h" #include "tsl/platform/errors.h" +#include "tsl/platform/logging.h" #include "tsl/platform/status.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/device_options.h b/third_party/xla/xla/stream_executor/device_options.h index d89c2e96b3e86b..5fc86b25e1c97a 100644 --- a/third_party/xla/xla/stream_executor/device_options.h +++ b/third_party/xla/xla/stream_executor/device_options.h @@ -22,8 +22,8 @@ limitations under the License. #include #include "absl/strings/str_join.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" +#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/dnn.h b/third_party/xla/xla/stream_executor/dnn.h index b07a6a1819fef6..1cf598e7f37141 100644 --- a/third_party/xla/xla/stream_executor/dnn.h +++ b/third_party/xla/xla/stream_executor/dnn.h @@ -43,8 +43,8 @@ limitations under the License. #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/dnn.pb.h" #include "xla/stream_executor/numeric_options.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" +#include "tsl/platform/logging.h" #include "tsl/platform/status.h" #include "tsl/platform/statusor.h" diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 0f19b4852f5235..a790045a3e42f0 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -199,6 +199,7 @@ cc_library( "//xla/stream_executor:stream_executor_internal", "//xla/stream_executor:stream_executor_pimpl_header", "//xla/stream_executor/platform", + "@local_tsl//tsl/platform:logging", ], ) diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_kernel.h b/third_party/xla/xla/stream_executor/gpu/gpu_kernel.h index e8eb1f1c9eee31..b7fa33c87e5ace 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_kernel.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_kernel.h @@ -24,9 +24,9 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_driver.h" #include "xla/stream_executor/kernel_cache_config.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/stream_executor_internal.h" +#include "tsl/platform/logging.h" namespace stream_executor { namespace gpu { diff --git a/third_party/xla/xla/stream_executor/kernel.cc b/third_party/xla/xla/stream_executor/kernel.cc index b4a9f9aa7e1618..d09f25873c8555 100644 --- a/third_party/xla/xla/stream_executor/kernel.cc +++ b/third_party/xla/xla/stream_executor/kernel.cc @@ -22,10 +22,10 @@ limitations under the License. #include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "xla/stream_executor/platform.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/stream_executor.h" #include "tsl/platform/demangle.h" +#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/kernel_spec.h b/third_party/xla/xla/stream_executor/kernel_spec.h index 8b33a5fe8dd7f7..70ee8b607faa00 100644 --- a/third_party/xla/xla/stream_executor/kernel_spec.h +++ b/third_party/xla/xla/stream_executor/kernel_spec.h @@ -53,8 +53,8 @@ limitations under the License. #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" +#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/module_spec.h b/third_party/xla/xla/stream_executor/module_spec.h index c797df1559e18c..fe34a49626ecc9 100644 --- a/third_party/xla/xla/stream_executor/module_spec.h +++ b/third_party/xla/xla/stream_executor/module_spec.h @@ -19,8 +19,8 @@ limitations under the License. #include #include "absl/types/span.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" +#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/platform.cc b/third_party/xla/xla/stream_executor/platform.cc index 04ddc16a602bc8..9f2c5c0978f128 100644 --- a/third_party/xla/xla/stream_executor/platform.cc +++ b/third_party/xla/xla/stream_executor/platform.cc @@ -16,10 +16,10 @@ limitations under the License. #include "xla/stream_executor/platform.h" #include "absl/strings/str_cat.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/stream_executor_pimpl.h" #include "tsl/platform/errors.h" +#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/platform/BUILD b/third_party/xla/xla/stream_executor/platform/BUILD index 85a9641f51ce7d..084de6c15f4ebb 100644 --- a/third_party/xla/xla/stream_executor/platform/BUILD +++ b/third_party/xla/xla/stream_executor/platform/BUILD @@ -17,7 +17,6 @@ cc_library( name = "platform", textual_hdrs = [ "initialize.h", - "logging.h", "platform.h", "port.h", ], diff --git a/third_party/xla/xla/stream_executor/platform/logging.h b/third_party/xla/xla/stream_executor/platform/logging.h deleted file mode 100644 index 53121fc4165fe7..00000000000000 --- a/third_party/xla/xla/stream_executor/platform/logging.h +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_STREAM_EXECUTOR_PLATFORM_LOGGING_H_ -#define XLA_STREAM_EXECUTOR_PLATFORM_LOGGING_H_ - -#include "xla/stream_executor/platform/port.h" -#include "tsl/platform/logging.h" - -#if !(defined(PLATFORM_GOOGLE) || defined(PLATFORM_GOOGLE_ANDROID) || \ - defined(PLATFORM_GOOGLE_IOS) || defined(GOOGLE_LOGGING) || \ - defined(__EMSCRIPTEN__) || defined(PLATFORM_CHROMIUMOS)) - -#define PCHECK(invocation) CHECK(invocation) - -#endif - -#endif // XLA_STREAM_EXECUTOR_PLATFORM_LOGGING_H_ diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_blas.cc b/third_party/xla/xla/stream_executor/rocm/rocm_blas.cc index aa9929d77ceaa5..b426cd810ba407 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_blas.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_blas.cc @@ -34,12 +34,12 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_timer.h" #include "xla/stream_executor/platform/dso_loader.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/rocm/rocm_platform_id.h" #include "xla/stream_executor/scratch_allocator.h" #include "xla/stream_executor/stream_executor.h" +#include "tsl/platform/logging.h" #include "tsl/util/determinism.h" using tsl::OpDeterminismRequired; diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_diagnostics.cc b/third_party/xla/xla/stream_executor/rocm/rocm_diagnostics.cc index 555ce1cb7a293b..91ef8cf92c0113 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_diagnostics.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_diagnostics.cc @@ -35,9 +35,9 @@ limitations under the License. #include "absl/strings/str_format.h" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" -#include "xla/stream_executor/platform/logging.h" #include "tsl/platform/errors.h" #include "tsl/platform/host_info.h" +#include "tsl/platform/logging.h" namespace stream_executor { namespace rocm { diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_dnn.cc b/third_party/xla/xla/stream_executor/rocm/rocm_dnn.cc index dfdb95b690e2e1..b987cdc6b55988 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_dnn.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_dnn.cc @@ -36,7 +36,6 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_timer.h" #include "xla/stream_executor/platform/dso_loader.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/rocm/rocm_diagnostics.h" #include "xla/stream_executor/rocm/rocm_platform_id.h" @@ -46,6 +45,7 @@ limitations under the License. #include "tsl/platform/env.h" #include "tsl/platform/errors.h" #include "tsl/platform/hash.h" +#include "tsl/platform/logging.h" #include "tsl/util/determinism.h" #include "tsl/util/env_var.h" diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc index 752fc93fbd93b2..bde91ca30582c7 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc @@ -31,11 +31,11 @@ limitations under the License. #include "absl/synchronization/notification.h" #include "xla/stream_executor/gpu/gpu_diagnostics.h" #include "xla/stream_executor/gpu/gpu_driver.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/rocm/rocm_driver_wrapper.h" #include "tsl/platform/env.h" #include "tsl/platform/errors.h" +#include "tsl/platform/logging.h" #include "tsl/platform/numbers.h" #include "tsl/platform/stacktrace.h" #include "tsl/platform/static_threadlocal.h" diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_fft.cc b/third_party/xla/xla/stream_executor/rocm/rocm_fft.cc index 3131b74858248f..5dc336e996e30c 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_fft.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_fft.cc @@ -24,12 +24,12 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_stream.h" #include "xla/stream_executor/platform/dso_loader.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/rocm/rocm_platform_id.h" #include "xla/stream_executor/stream_executor_internal.h" #include "tsl/platform/env.h" +#include "tsl/platform/logging.h" namespace stream_executor { namespace gpu { @@ -533,8 +533,7 @@ bool ROCMFft::DoFftInternal(Stream *stream, fft::Plan *plan, FuncT hipfftExec, if (allocator) { auto allocated = allocator->AllocateBytes(input.size()); if (allocated.ok()) { - if (stream->ThenMemcpy(&allocated.value(), input, input.size()) - .ok()) { + if (stream->ThenMemcpy(&allocated.value(), input, input.size()).ok()) { input_maybe_copy = DeviceMemory(allocated.value()); } else { LOG(ERROR) << "failed to copy input buffer for rocFFT."; diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc index 5ad9feb70e5652..9c0dd98791a6a5 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc @@ -35,7 +35,6 @@ limitations under the License. #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform/dso_loader.h" #include "xla/stream_executor/platform/initialize.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" #include "xla/stream_executor/rocm/rocm_diagnostics.h" @@ -45,6 +44,7 @@ limitations under the License. #include "xla/stream_executor/stream_executor_pimpl.h" #include "tsl/platform/env.h" #include "tsl/platform/errors.h" +#include "tsl/platform/logging.h" #ifdef PLATFORMS_GPUS_ROCM_DYNAMIC_LIBROCM_DYNAMIC_LIBROCM_H_ #error \ @@ -182,7 +182,7 @@ tsl::Status GpuExecutor::Init(int device_ordinal, // would return /usr/bin. static string GetBinaryDir(bool strip_exe) { char exe_path[PATH_MAX] = {0}; - PCHECK(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1) != -1); + CHECK_NE(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1), -1); // Make sure it's null-terminated: exe_path[sizeof(exe_path) - 1] = 0; diff --git a/third_party/xla/xla/stream_executor/stream.cc b/third_party/xla/xla/stream_executor/stream.cc index fe1b4158ec6e5a..d8cfa7bb793cd4 100644 --- a/third_party/xla/xla/stream_executor/stream.cc +++ b/third_party/xla/xla/stream_executor/stream.cc @@ -28,10 +28,10 @@ limitations under the License. #include "xla/stream_executor/blas.h" #include "xla/stream_executor/numeric_options.h" #include "xla/stream_executor/platform.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/stream_executor_internal.h" #include "xla/stream_executor/stream_executor_pimpl.h" +#include "tsl/platform/logging.h" #include "tsl/platform/stacktrace.h" namespace stream_executor { @@ -238,10 +238,8 @@ std::string CallStr(const char *function_name, Stream *stream, // Use this macro to avoid having to type every parameter twice to log // it with VLOG and CallStr. -#define PARAM(parameter) \ - { \ -#parameter, ToVlogString(parameter) \ - } +#define PARAM(parameter) \ + { #parameter, ToVlogString(parameter) } // Use this macro to avoid having to type out the name of each // function and to save some boilerplate. Intended to be used like this: diff --git a/third_party/xla/xla/stream_executor/temporary_memory_manager.cc b/third_party/xla/xla/stream_executor/temporary_memory_manager.cc index c0b6bd2f53d1d8..690f64225611a9 100644 --- a/third_party/xla/xla/stream_executor/temporary_memory_manager.cc +++ b/third_party/xla/xla/stream_executor/temporary_memory_manager.cc @@ -19,9 +19,9 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" -#include "xla/stream_executor/platform/logging.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor_pimpl.h" +#include "tsl/platform/logging.h" namespace stream_executor { namespace internal { From 67b2bb20ea256517cc514b83cae36aaa79f93fc4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 19:22:42 -0700 Subject: [PATCH 070/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/6a3e3ece9c01f3e5742a297c73357d463a2fe151. PiperOrigin-RevId: 567151719 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index ab7501924e78b2..a788df8889a6a7 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "752d6d83d403986227dffe42beb5014843cf2ddb" - TFRT_SHA256 = "818d9b3951c1da81a937a24c3875bfae38664ceb37909e5438bbebf708279a24" + TFRT_COMMIT = "6a3e3ece9c01f3e5742a297c73357d463a2fe151" + TFRT_SHA256 = "24477cd3e9ac93a1010542de308341f1c112df974f9510c18e4d4efb2de78e59" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index ab7501924e78b2..a788df8889a6a7 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "752d6d83d403986227dffe42beb5014843cf2ddb" - TFRT_SHA256 = "818d9b3951c1da81a937a24c3875bfae38664ceb37909e5438bbebf708279a24" + TFRT_COMMIT = "6a3e3ece9c01f3e5742a297c73357d463a2fe151" + TFRT_SHA256 = "24477cd3e9ac93a1010542de308341f1c112df974f9510c18e4d4efb2de78e59" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index ab7501924e78b2..a788df8889a6a7 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "752d6d83d403986227dffe42beb5014843cf2ddb" - TFRT_SHA256 = "818d9b3951c1da81a937a24c3875bfae38664ceb37909e5438bbebf708279a24" + TFRT_COMMIT = "6a3e3ece9c01f3e5742a297c73357d463a2fe151" + TFRT_SHA256 = "24477cd3e9ac93a1010542de308341f1c112df974f9510c18e4d4efb2de78e59" tf_http_archive( name = "tf_runtime", From 413c90e89c1e6d29eef4e4dc0e3b3fbb647addb1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 19:38:28 -0700 Subject: [PATCH 071/567] Update the logic of `PjRtArray::Reshard` after `PjRtBuffer::CopyToMemorySpace` was introduced. Users should use `PjRtBuffer::CopyToMemorySpace` instead of `PjRtBuffer::CopyToDevice` when memories are supported, since the semantics of the latter one is to always copy to the default memory space of the device. PiperOrigin-RevId: 567154400 --- .../xla/xla/python/pjrt_ifrt/pjrt_array.cc | 54 +++++++++++++------ third_party/xla/xla/python/xla_client.py | 2 +- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_array.cc b/third_party/xla/xla/python/pjrt_ifrt/pjrt_array.cc index 8678ff06b79f5a..205c376246ad38 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/pjrt_array.cc +++ b/third_party/xla/xla/python/pjrt_ifrt/pjrt_array.cc @@ -22,6 +22,8 @@ limitations under the License. #include #include "absl/container/inlined_vector.h" +#include "absl/log/check.h" +#include "absl/strings/match.h" #include "absl/strings/str_join.h" #include "xla/literal.h" #include "xla/pjrt/pjrt_client.h" @@ -340,16 +342,29 @@ StatusOr> PjRtArray::Reshard( // permits device changes and nothing else. PjRtBuffers buffers; buffers.reserve(pjrt_buffers_.size()); + // TODO(yueshengys): Add a on-demand canonicalization when all users + // (e.g., ifrt proxy) support memories. + bool new_sharding_has_memory_kind = + new_sharding->memory_kind().memory_kind().has_value(); + // TODO(yueshengys): Remove the check on PjRt C API after `CopyToMemorySpace` + // is supported. + CHECK_GT(new_sharding->devices().size(), 0); + bool using_c_api = absl::StrContains( + new_sharding->devices().front()->client()->platform_version(), + "PJRT C API"); for (int i = 0; i < pjrt_buffers_.size(); ++i) { + bool devices_equal = + pjrt_buffers_[i]->device() == new_sharding->devices()[i]; + bool memories_supported = + !using_c_api && pjrt_buffers_[i]->memory_space() != nullptr; bool memory_kind_equal = - !new_sharding->memory_kind().memory_kind().has_value() || - pjrt_buffers_[i]->memory_space() == nullptr || + new_sharding_has_memory_kind && memories_supported && pjrt_buffers_[i]->memory_space()->memory_space_kind() == new_sharding->memory_kind().memory_kind(); - bool devices_equal = - pjrt_buffers_[i]->device() == new_sharding->devices()[i]; - if (devices_equal && memory_kind_equal) { + // No need for data transfer. + if (devices_equal && (!new_sharding_has_memory_kind || + !memories_supported || memory_kind_equal)) { switch (semantics) { case ArrayCopySemantics::kAlwaysCopy: // TODO(hyeontaek): kAlwaysCopy should clone the buffer, but the PjRt @@ -373,25 +388,32 @@ StatusOr> PjRtArray::Reshard( "first fetched to the host and then sent to the destination " "device."); } - if (!devices_equal && memory_kind_equal) { + // Use `PjRtBuffer::CopyToMemorySpace` instead of + // `PjRtBuffer::CopyToDevice` when memories are supported. Because the + // semantics of the latter one is to copy to the default memory space of + // the device. + if (new_sharding_has_memory_kind && memories_supported) { TF_ASSIGN_OR_RETURN( - std::unique_ptr copied_buffer, - pjrt_buffers_[i]->CopyToDevice(new_sharding->devices()[i])); + auto memory_space, + GetMemorySpaceFromMemoryKind(new_sharding->devices()[i], + new_sharding->memory_kind())); + TF_ASSIGN_OR_RETURN(std::unique_ptr copied_buffer, + pjrt_buffers_[i]->CopyToMemorySpace(memory_space)); if (semantics == ArrayCopySemantics::kDonateInput) { + if (!memory_kind_equal) { + return Unimplemented( + "Donation across different memory kinds is not implemented."); + } pjrt_buffers_[i] = nullptr; } buffers.push_back(std::shared_ptr(copied_buffer.release())); } else { - // memory_kind is not equal and devices can be equal or not equal. + // Use `PjRtBuffer::CopyToDevice` when memories are not supported. TF_ASSIGN_OR_RETURN( - auto memory_space, - GetMemorySpaceFromMemoryKind(new_sharding->devices()[i], - new_sharding->memory_kind())); - TF_ASSIGN_OR_RETURN(std::unique_ptr copied_buffer, - pjrt_buffers_[i]->CopyToMemorySpace(memory_space)); + std::unique_ptr copied_buffer, + pjrt_buffers_[i]->CopyToDevice(new_sharding->devices()[i])); if (semantics == ArrayCopySemantics::kDonateInput) { - return Unimplemented( - "Donation across different memory kinds is not implemented."); + pjrt_buffers_[i] = nullptr; } buffers.push_back(std::shared_ptr(copied_buffer.release())); } diff --git a/third_party/xla/xla/python/xla_client.py b/third_party/xla/xla/python/xla_client.py index 3de4938fdb1204..0c19224f787919 100644 --- a/third_party/xla/xla/python/xla_client.py +++ b/third_party/xla/xla/python/xla_client.py @@ -44,7 +44,7 @@ # Just an internal arbitrary increasing number to help with backward-compatible # changes. In JAX, reference this via jax._src.lib.xla_extension_version. -_version = 195 +_version = 196 # Version number for MLIR:Python components. mlir_api_version = 54 From 84c4dd54b92e2af00b4366f87b0bad7ad78a6c9d Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 20 Sep 2023 19:57:58 -0700 Subject: [PATCH 072/567] [stream_executor] NFC: Document exported device allocator headers Issue: #5761 PiperOrigin-RevId: 567157790 --- third_party/xla/xla/stream_executor/BUILD | 25 +++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 5d40427a31f392..f2c8cffb029550 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -55,6 +55,22 @@ cc_library( ], ) +#===--------------------------------------------------------------------------------------------===# +# Exporting headers for Tensorflow +#===--------------------------------------------------------------------------------------------===# + +# Tensorflow device memory allocators are aliases for StreamExecutor allocators, we export +# headers for Tensorflow to build shared libraries in OSS. + +filegroup( + name = "device_mem_allocator_headers", + srcs = [ + "device_host_allocator.h", + "device_mem_allocator.h", + ], + visibility = ["//visibility:public"], +) + #===--------------------------------------------------------------------------------------------===# # The stream_executor_headers target does not prescribe an implementation. @@ -318,15 +334,6 @@ cc_library( ], ) -filegroup( - name = "device_mem_allocator_headers", - srcs = [ - "device_host_allocator.h", - "device_mem_allocator.h", - ], - visibility = ["//visibility:public"], -) - cc_library( name = "executor_cache", srcs = [ From 9f8e4fa28ad556e1de4b3222c4e2f58bb331f774 Mon Sep 17 00:00:00 2001 From: Jieying Luo Date: Wed, 20 Sep 2023 21:10:11 -0700 Subject: [PATCH 073/567] Fix parsing visible_devices option in GPU plugin. PiperOrigin-RevId: 567173464 --- third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc | 2 +- third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc index 1cbc0dffe49148..75a14f42118b4d 100644 --- a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc @@ -86,7 +86,7 @@ PJRT_Error* PJRT_Client_Create(PJRT_Client_Create_Args* args) { if (auto it = create_options.find("visible_devices"); it != create_options.end()) { const auto& vec = std::get>(it->second); - visible_devices->insert(vec.begin(), vec.end()); + visible_devices.emplace(vec.begin(), vec.end()); } int node_id = 0; if (auto it = create_options.find("node_id"); it != create_options.end()) { diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc index 0f3c089acb2c66..63102691defd0c 100644 --- a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc @@ -166,13 +166,14 @@ TEST(PjrtCApiGpuKVStoreTest, CreateClientWithKVCallback) { } } -TEST(PjrtCApiGpuAllocatorTest, ValidAllocatorOptionsParsing) { +TEST(PjrtCApiGpuAllocatorTest, ValidOptionsParsing) { auto api = GetPjrtApi(); std::vector allocator_options = {"default", "platform", "bfc", "cuda_async"}; for (const std::string& allocator_option : allocator_options) { absl::flat_hash_map options = { {"allocator", allocator_option}, + {"visible_devices", xla::PjRtValueType(std::vector{0, 1})}, }; if (allocator_option == "bfc" || allocator_option == "cuda_async") { options["memory_fraction"] = 0.5f; From b1995d4f73516a8330a0355b8b06193f44bc8e1f Mon Sep 17 00:00:00 2001 From: Jieying Luo Date: Wed, 20 Sep 2023 21:10:48 -0700 Subject: [PATCH 074/567] [PJRT C API] Fix PjRtCApiClient::ExecutableFingerprint. When the C API returns an empty fingerprint, PjRtCApiClient::ExecutableFingerprint should return a std::nullopt. PiperOrigin-RevId: 567173579 --- third_party/xla/xla/pjrt/pjrt_c_api_client.cc | 4 ++++ .../xla/xla/pjrt/pjrt_c_api_client_test.cc | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/third_party/xla/xla/pjrt/pjrt_c_api_client.cc b/third_party/xla/xla/pjrt/pjrt_c_api_client.cc index a1a13020341e96..e6940affd26482 100644 --- a/third_party/xla/xla/pjrt/pjrt_c_api_client.cc +++ b/third_party/xla/xla/pjrt/pjrt_c_api_client.cc @@ -318,6 +318,10 @@ StatusOr> PjRtCApiClient::ExecutableFingerprint( xla::Status s = ::pjrt::PjrtErrorToStatus(error.get(), c_api_); return s; } + if (args.executable_fingerprint == nullptr || + args.executable_fingerprint_size == 0) { + return {std::nullopt}; + } std::string fingerprint = std::string(args.executable_fingerprint, args.executable_fingerprint_size); return {fingerprint}; diff --git a/third_party/xla/xla/pjrt/pjrt_c_api_client_test.cc b/third_party/xla/xla/pjrt/pjrt_c_api_client_test.cc index 7ce67c622f0640..d4477696f18d7a 100644 --- a/third_party/xla/xla/pjrt/pjrt_c_api_client_test.cc +++ b/third_party/xla/xla/pjrt/pjrt_c_api_client_test.cc @@ -99,5 +99,25 @@ TEST(PjRtCApiClientTest, PlatformId) { EXPECT_EQ(client->platform_id(), xla::CpuId()); } +TEST(PjRtCApiClientTest, EmptyExecutableFingerprint) { + SetUpCpuPjRtApi(); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr client, + GetCApiClient("cpu")); + Shape shape = ShapeUtil::MakeShapeWithType({4}); + XlaBuilder builder("sum"); + auto inp_0 = Parameter(&builder, 0, shape, "input0"); + auto inp_1 = Parameter(&builder, 1, shape, "input1"); + auto sum = Add(inp_0, inp_1); + builder.SetUpAlias({}, 0, {}); + auto computation = builder.Build(sum).value(); + std::unique_ptr executable = + client->Compile(computation, CompileOptions()).value(); + + TF_ASSERT_OK_AND_ASSIGN(std::optional fingerprint, + client->ExecutableFingerprint(*executable)); + + EXPECT_FALSE(fingerprint.has_value()); +} + } // namespace } // namespace xla From 88c8f44c2860956460a4a1722a3d2e020533a6d9 Mon Sep 17 00:00:00 2001 From: "Jiyoun (Jen) Ha" Date: Wed, 20 Sep 2023 22:30:49 -0700 Subject: [PATCH 075/567] (2/N) Refactor stablehlo bridge namespaces to mlir::quant::stablehlo. PiperOrigin-RevId: 567187656 --- .../mlir/lite/stablehlo/transforms/transforms.cc | 2 +- tensorflow/compiler/mlir/python/mlir.cc | 2 +- .../passes/bridge/convert_mhlo_quant_to_int.cc | 8 +++----- .../passes/bridge/convert_tf_quant_ops_to_mhlo.cc | 15 ++++++--------- .../bridge/convert_tf_quant_to_mhlo_int_test.cc | 6 ++---- .../passes/bridge/convert_tf_quant_types.cc | 8 +++----- .../passes/bridge/convert_tf_quant_types_test.cc | 8 +++----- .../stablehlo/passes/bridge/passes.cc | 4 ++-- .../quantization/stablehlo/passes/bridge/passes.h | 4 ++-- .../stablehlo/passes/bridge/passes.td | 8 ++++---- .../passes/bridge/verify_quant_legalization.cc | 6 ++---- .../stablehlo/tools/stablehlo_quant_opt.cc | 2 +- .../quantization/tensorflow/quantize_passes.cc | 3 ++- .../tensorflow/quantize_preprocess.cc | 2 +- .../mlir/tf2xla/api/v1/compile_mlir_util.cc | 4 ++-- tensorflow/compiler/mlir/tf_mlir_opt_main.cc | 2 +- 16 files changed, 36 insertions(+), 48 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/transforms.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/transforms.cc index 3b4e3a58bec738..562afd53f6f76c 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/transforms.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/transforms.cc @@ -67,7 +67,7 @@ void AddTFToStablehloPasses(OpPassManager& pm, bool skip_resize, // Legalizes TF UniformQuantized types into MHLO. pm.addNestedPass( - mlir::stablehlo::CreateConvertTFQuantOpsToMHLOPass()); + mlir::quant::stablehlo::CreateConvertTFQuantOpsToMHLOPass()); pm.addPass(mlir::createCanonicalizerPass()); // TF -> StableHLO legalization. diff --git a/tensorflow/compiler/mlir/python/mlir.cc b/tensorflow/compiler/mlir/python/mlir.cc index 2fc6dcdd84cf24..4841c0ad85714f 100644 --- a/tensorflow/compiler/mlir/python/mlir.cc +++ b/tensorflow/compiler/mlir/python/mlir.cc @@ -97,7 +97,7 @@ static void RegisterPasses() { // passes. mlir::mhlo::registerTfXlaPasses(); mlir::mhlo::registerLegalizeTFPass(); - mlir::stablehlo::registerBridgePasses(); + mlir::quant::stablehlo::registerBridgePasses(); mlir::tosa::registerLegalizeTosaPasses(); mlir::tosa::registerTFtoTOSALegalizationPipeline(); mlir::tosa::registerTFLtoTOSALegalizationPipeline(); diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc index 4a26d6d70398b5..255c69573f08eb 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc @@ -46,8 +46,7 @@ limitations under the License. #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" #include "xla/mlir_hlo/mhlo/transforms/rewriters.h" -namespace mlir { -namespace stablehlo { +namespace mlir::quant::stablehlo { namespace { #define GEN_PASS_DEF_CONVERTMHLOQUANTTOINT @@ -735,12 +734,11 @@ void ConvertMHLOQuantToInt::runOnOperation() { } } -} // end namespace +} // namespace std::unique_ptr> createConvertMHLOQuantToIntPass( bool legalize_chlo) { return std::make_unique(legalize_chlo); } -} // end namespace stablehlo -} // end namespace mlir +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_ops_to_mhlo.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_ops_to_mhlo.cc index 28b72634516b59..a24db896d79f1d 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_ops_to_mhlo.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_ops_to_mhlo.cc @@ -58,9 +58,7 @@ limitations under the License. #include "tensorflow/core/util/quantization/uniform_quant_ops_attr.pb.h" #include "tensorflow/core/util/quantization/uniform_quant_ops_params.h" -// TODO: b/289560952 - Move to mlir::quant::stablehlo namespace. -namespace mlir { -namespace stablehlo { +namespace mlir::quant::stablehlo { namespace { using quant::tensorflow::GetDenseAttrFromTensorProtoAttr; @@ -138,7 +136,7 @@ FailureOr CreateConstantOrConvertOp(Operation *op, Value operand, } xla::ConvolutionDimensionNumbers ConvertConvolutionDimensionNumbers( - const tensorflow::UniformQuantizedConvolutionDimensionNumbersAttr + const ::tensorflow::UniformQuantizedConvolutionDimensionNumbersAttr &dnums_input) { xla::ConvolutionDimensionNumbers dnums; dnums.set_input_batch_dimension(dnums_input.input_batch_dimension()); @@ -207,11 +205,11 @@ FailureOr ConvertPaddingAttr( const int64_t stride = op.getWindowStridesAttr()[i].template cast().getInt(); const int64_t lhs_size_dilated = - tensorflow::UniformQuantizedConvolutionParams::DilatedSize( + ::tensorflow::UniformQuantizedConvolutionParams::DilatedSize( lhs_shape.getDimSize(dnums.input_spatial_dimensions(i)), op.getLhsDilationAttr()[i].template cast().getInt()); const int64_t rhs_size_dilated = - tensorflow::UniformQuantizedConvolutionParams::DilatedSize( + ::tensorflow::UniformQuantizedConvolutionParams::DilatedSize( rhs_shape.getDimSize(dnums.kernel_spatial_dimensions(i)), op.getRhsDilationAttr()[i].template cast().getInt()); @@ -238,7 +236,7 @@ FailureOr> ConvertToMhloConvolutionOpAttrs( UniformQuantizedConvolutionOp op, PatternRewriter &rewriter) { // TODO(b/261005147): Update the lowering logic after migration to mhlo // ConvolutionDimensionNumbers. - tensorflow::UniformQuantizedConvolutionDimensionNumbersAttr dnums_input; + ::tensorflow::UniformQuantizedConvolutionDimensionNumbersAttr dnums_input; if (!dnums_input.ParseFromString(std::string(op.getDimensionNumbers()))) { return op->emitError("Parse dimension_numbers failed."); } @@ -766,5 +764,4 @@ CreateConvertTFQuantOpsToMHLOPass() { return std::make_unique(); } -} // namespace stablehlo -} // namespace mlir +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc index e78ebbfb0ef961..5e80a9ce8d894e 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc @@ -35,8 +35,7 @@ limitations under the License. #include "xla/statusor.h" #include "xla/tests/literal_test_util.h" -namespace mlir { -namespace stablehlo { +namespace mlir::quant::stablehlo { namespace { class ConvertTfQuantToMhloIntTest : public ::testing::Test { @@ -194,5 +193,4 @@ func.func @main(%input: tensor<1x2xf32>, %filter: tensor<2x3xf32>) -> tensor<1x3 } } // namespace -} // namespace stablehlo -} // end namespace mlir +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc index a60df89d58ed59..65192fc1117673 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc @@ -43,8 +43,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/core/lib/monitoring/counter.h" -namespace mlir { -namespace stablehlo { +namespace mlir::quant::stablehlo { namespace { using quant::tensorflow::GetDenseAttrFromTensorProtoAttr; @@ -56,7 +55,7 @@ using quant::tensorflow::IsTFUniformQuantizedOp; #include "tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.h.inc" // TODO: b/290366702 - Temporarily added metrics for debugging. -auto *mlir_tf_quant_op_count = tensorflow::monitoring::Counter<1>::New( +auto *mlir_tf_quant_op_count = ::tensorflow::monitoring::Counter<1>::New( "/tensorflow/core/tf2xla/tf_quant_op_count" /*metric_name*/, "Counts the number of ops that has qint types" /*metric description*/, "op_name" /*metric label*/); @@ -326,5 +325,4 @@ std::unique_ptr> CreateConvertTFQuantTypesPass() { return std::make_unique(); } -} // namespace stablehlo -} // namespace mlir +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types_test.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types_test.cc index cc46304650b267..856bbd49930341 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types_test.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types_test.cc @@ -30,8 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/monitoring/cell_reader.h" #include "tsl/platform/statusor.h" -namespace mlir { -namespace stablehlo { +namespace mlir::quant::stablehlo { namespace { using ::mlir::DialectRegistry; @@ -54,7 +53,7 @@ class LegalizeTfTypesTest : public ::testing::Test { pm_ = std::make_unique(&context_); pm_->addNestedPass( - mlir::stablehlo::CreateConvertTFQuantTypesPass()); + quant::stablehlo::CreateConvertTFQuantTypesPass()); } mlir::LogicalResult Run() { return pm_->run(module_.get()); } @@ -105,5 +104,4 @@ TEST_F(LegalizeTfTypesTest, RecordsStreamzNoQuantOps) { } } // namespace -} // namespace stablehlo -} // namespace mlir +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.cc index 1aae78d6340645..aab45eae763e33 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.cc @@ -21,7 +21,7 @@ limitations under the License. #include "mlir/Transforms/Passes.h" // from @llvm-project #include "xla/mlir_hlo/mhlo/transforms/passes.h" -namespace mlir::stablehlo { +namespace mlir::quant::stablehlo { void AddQuantizationLoweringPasses(mlir::OpPassManager& pm) { // These passes are grouped together and must run in this specific order. @@ -35,4 +35,4 @@ void AddQuantizationLoweringPasses(mlir::OpPassManager& pm) { pm.addNestedPass(CreateVerifyQuantLegalizationPass()); } -} // namespace mlir::stablehlo +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.h b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.h index d03ef5837c670c..983339d813c7a0 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.h +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.h @@ -22,7 +22,7 @@ limitations under the License. #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project -namespace mlir::stablehlo { +namespace mlir::quant::stablehlo { // Legalizes from MHLO quantized ops with MHLO quant types to MHLO primitive ops // like int ops. @@ -58,6 +58,6 @@ void AddQuantizationLoweringPasses(mlir::OpPassManager &pm); #define GEN_PASS_DECL_CONVERTTFQUANTTYPES #define GEN_PASS_DECL_VERIFYQUANTLEGALIZATION #include "tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.h.inc" -} // namespace mlir::stablehlo +} // namespace mlir::quant::stablehlo #endif // TENSORFLOW_COMPILER_MLIR_QUANTIZATION_STABLEHLO_PASSES_BRIDGE_PASSES_H_ diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.td b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.td index cf0cfddd982979..af616129ccf785 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.td +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/passes.td @@ -29,7 +29,7 @@ def ConvertMHLOQuantToInt : Pass<"convert-mhlo-quant-to-int", "mlir::func::FuncO "Legalizes intermediate chlo ops to hlo"> ]; - let constructor = "mlir::stablehlo::createConvertMHLOQuantToIntPass()"; + let constructor = "mlir::quant::stablehlo::createConvertMHLOQuantToIntPass()"; let dependentDialects = ["chlo::ChloDialect", "mhlo::MhloDialect", "quant::QuantizationDialect", "shape::ShapeDialect", @@ -43,7 +43,7 @@ def ConvertTFQuantOpsToMHLO : Pass<"quant-convert-tf-quant-ops-to-mhlo", "mlir:: Convert TF Quant ops to MHLO quant ops. }]; - let constructor = "mlir::stablehlo::CreateConvertTFQuantOpsToMHLOPass()"; + let constructor = "mlir::quant::stablehlo::CreateConvertTFQuantOpsToMHLOPass()"; let dependentDialects = ["TF::TensorFlowDialect", "chlo::ChloDialect", "mhlo::MhloDialect", "tf_type::TFTypeDialect", "quant::QuantizationDialect"]; @@ -58,7 +58,7 @@ def ConvertTFQuantTypes : Pass<"convert-tf-quant-types", "mlir::func::FuncOp"> { tf.Cast around the ops so that they are still valid. }]; - let constructor = "mlir::stablehlo::CreateConvertTFQuantTypesPass()"; + let constructor = "mlir::quant::stablehlo::CreateConvertTFQuantTypesPass()"; let dependentDialects = ["TF::TensorFlowDialect", "tf_type::TFTypeDialect"]; } @@ -70,7 +70,7 @@ def VerifyQuantLegalization : Pass<"verify-quant-legalization", "mlir::func::Fun and reports an error about which op failed to legalize. This pass does not transform any ops and is checking.}]; - let constructor = "mlir::stablehlo::CreateVerifyQuantLegalizationPass()"; + let constructor = "mlir::quant::stablehlo::CreateVerifyQuantLegalizationPass()"; let dependentDialects = ["tf_type::TFTypeDialect", "quant::QuantizationDialect"]; diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/verify_quant_legalization.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/verify_quant_legalization.cc index c8833ebd8d37d9..361d98c7775abe 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/verify_quant_legalization.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/verify_quant_legalization.cc @@ -36,8 +36,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" -namespace mlir { -namespace stablehlo { +namespace mlir::quant::stablehlo { namespace { using quant::tensorflow::IsTFQintType; @@ -90,5 +89,4 @@ CreateVerifyQuantLegalizationPass() { return std::make_unique(); } -} // namespace stablehlo -} // namespace mlir +} // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/tools/stablehlo_quant_opt.cc b/tensorflow/compiler/mlir/quantization/stablehlo/tools/stablehlo_quant_opt.cc index c72a75d1acbc22..d05bdb48643458 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/tools/stablehlo_quant_opt.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/tools/stablehlo_quant_opt.cc @@ -34,7 +34,7 @@ int main(int argc, char **argv) { mlir::registerAllPasses(); mlir::registerTensorFlowPasses(); - mlir::stablehlo::registerBridgePasses(); + mlir::quant::stablehlo::registerBridgePasses(); mlir::DialectRegistry registry; registry.insert MHLO legalization. pm.addPass(mlir::mhlo::createStablehloLegalizeToHloPass()); pm.addNestedPass( - mlir::stablehlo::createConvertMHLOQuantToIntPass(/*legalize_chlo=*/true)); + mlir::quant::stablehlo::createConvertMHLOQuantToIntPass( + /*legalize_chlo=*/true)); pm.addNestedPass(mlir::createCanonicalizerPass()); pm.addPass(mlir::createSymbolDCEPass()); // MHLO -> StableHLO legalization. diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/quantize_preprocess.cc b/tensorflow/compiler/mlir/quantization/tensorflow/quantize_preprocess.cc index 3954d90b48820f..c766a3c87ca75e 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/quantize_preprocess.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/quantize_preprocess.cc @@ -92,7 +92,7 @@ void AddTFToStablehloPasses(mlir::PassManager& pm) { // Legalizes TF UniformQuantized types into MHLO. Part of the official // TF/XLA bridge component. pm.addNestedPass( - mlir::stablehlo::CreateConvertTFQuantOpsToMHLOPass()); + mlir::quant::stablehlo::CreateConvertTFQuantOpsToMHLOPass()); pm.addPass(mlir::createCanonicalizerPass()); // TF -> StableHLO legalization. diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util.cc b/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util.cc index 24dbf9c9d23a48..0c1c3b95b5f8d3 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util.cc @@ -340,7 +340,7 @@ void AddLegalizationPasses(mlir::OpPassManager& pm, bool legalize_chlo, bool lower_to_xla_hlo) { if (lower_to_xla_hlo) { // Lower TF quant ops and types to MHLO int. - mlir::stablehlo::AddQuantizationLoweringPasses(pm); + mlir::quant::stablehlo::AddQuantizationLoweringPasses(pm); pm.addPass(mlir::mhlo::createLegalizeTFPass( legalize_chlo, @@ -449,7 +449,7 @@ void CreateConvertMlirToXlaHloPipeline( pm.addNestedPass(mlir::TF::CreateLowerQuantizedPass()); pm.addNestedPass( - mlir::stablehlo::CreateConvertTFQuantTypesPass()); + mlir::quant::stablehlo::CreateConvertTFQuantTypesPass()); if (lower_to_xla_hlo) { for (auto& target_pass : custom_legalization_passes) { diff --git a/tensorflow/compiler/mlir/tf_mlir_opt_main.cc b/tensorflow/compiler/mlir/tf_mlir_opt_main.cc index ed8106175e8ae0..433a05f396bf2f 100644 --- a/tensorflow/compiler/mlir/tf_mlir_opt_main.cc +++ b/tensorflow/compiler/mlir/tf_mlir_opt_main.cc @@ -53,7 +53,7 @@ int main(int argc, char **argv) { // These are in compiler/mlir/tf2xla and not part of the above MHLO passes. mlir::mhlo::registerLegalizeTfPasses(); mlir::mhlo::registerTfXlaPasses(); - mlir::stablehlo::registerBridgePasses(); + mlir::quant::stablehlo::registerBridgePasses(); mlir::tosa::registerLegalizeTosaPasses(); mlir::tosa::registerTFtoTOSALegalizationPipeline(); mlir::tosa::registerTFLtoTOSALegalizationPipeline(); From dd689216576d6af7fb6e49453909458e5dde519f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 20 Sep 2023 22:55:09 -0700 Subject: [PATCH 076/567] Internal Code Change PiperOrigin-RevId: 567191671 --- tensorflow/core/common_runtime/eager/BUILD | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index ef3e32c49a6529..b3afde2fda2770 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -17,10 +17,7 @@ load( package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], - default_visibility = [ - "//tensorflow:internal", - "//tensorflow_models:__subpackages__", - ], + default_visibility = ["//tensorflow:internal"], licenses = ["notice"], ) From 6da396b975ccf5385564cf13d13795072d0411c4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 02:02:01 -0700 Subject: [PATCH 077/567] compat: Update forward compatibility horizon to 2023-09-21 PiperOrigin-RevId: 567232445 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 0be4fb3a98e8b1..6e494480e54ba7 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 20) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 21) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 7f1050a6976d11bfb0bb37bdfc82350c0a238faa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 02:02:19 -0700 Subject: [PATCH 078/567] Update GraphDef version to 1626. PiperOrigin-RevId: 567232525 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 05991cc0e70419..7c76122b9da32f 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1625 // Updated: 2023/9/20 +#define TF_GRAPH_DEF_VERSION 1626 // Updated: 2023/9/21 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 0c7098d3552a12037d3fede9784952de732675c2 Mon Sep 17 00:00:00 2001 From: George Necula Date: Thu, 21 Sep 2023 05:02:22 -0700 Subject: [PATCH 079/567] Improve shape refinement to not require inlining. PiperOrigin-RevId: 567273141 --- .../compiler/tests/xla_call_module_test.py | 82 ++ third_party/stablehlo/temporary.patch | 1180 ++++++++++++++++- .../xla/third_party/stablehlo/temporary.patch | 1180 ++++++++++++++++- .../xla/python/refine_polymorphic_shapes.cc | 4 - 4 files changed, 2410 insertions(+), 36 deletions(-) diff --git a/tensorflow/compiler/tests/xla_call_module_test.py b/tensorflow/compiler/tests/xla_call_module_test.py index a24930a7b8c846..ee651b107c5b8e 100644 --- a/tensorflow/compiler/tests/xla_call_module_test.py +++ b/tensorflow/compiler/tests/xla_call_module_test.py @@ -259,6 +259,88 @@ def f(x): # x: f32[2, b] self._assertOpOutputMatchesExpected(f, (x,), (np.sin(x),)) + def test_poly_with_inner_token(self): + # The inner functions pass tokens through + x = np.arange(12, dtype=np.float32).reshape((3, 4)) + + def f(x): # x : f32[b0, b1] + # 1 + sin(x) + module, version = serialize(""" +module @jit_f.0 attributes {jax.uses_shape_polymorphism = true} { + func.func public @main(%arg0: tensor) -> tensor { + %0 = stablehlo.get_dimension_size %arg0, dim = 0 : (tensor) -> tensor + %1 = stablehlo.get_dimension_size %arg0, dim = 1 : (tensor) -> tensor + %2 = stablehlo.constant dense<> : tensor<0xi1> + %3:2 = call @_wrapped_main(%0, %1, %2, %arg0) : (tensor, tensor, tensor<0xi1>, tensor) -> (tensor<0xi1>, tensor) + return %3#1 : tensor + } + + func.func private @_wrapped_main(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xi1> {jax.token = true}, %arg3: tensor) -> (tensor<0xi1> {jax.token = true}, tensor) { + %0 = stablehlo.create_token : !stablehlo.token + %1 = stablehlo.sine %arg3 : tensor + %2 = stablehlo.constant dense<1.000000e+00> : tensor + %3 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> + %4 = stablehlo.reshape %arg1 : (tensor) -> tensor<1xi32> + %5 = stablehlo.concatenate %3, %4, dim = 0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %6 = stablehlo.dynamic_broadcast_in_dim %2, %5, dims = [] : (tensor, tensor<2xi32>) -> tensor + %7 = stablehlo.add %1, %6 : tensor + %8 = stablehlo.constant dense<> : tensor<0xi1> + return %8, %7 : tensor<0xi1>, tensor + } +} +""") + return xla.call_module( + [x], + version=version, + module=module, + Tout=[x.dtype], + Sout=[x.shape], + has_token_input_output=False, + platforms=[self.testing_platform()], + ) + + self._assertOpOutputMatchesExpected(f, (x,), (1. + np.sin(x),)) + + def test_poly_with_inner_prefix_token(self): + # Sometimes inner functions take a token as first argument + x = np.arange(12, dtype=np.float32).reshape((3, 4)) + + def f(x): # x : f32[b0, b1] + # 1 + sin(x) + module, version = serialize(""" +module @jit_f.0 attributes {jax.uses_shape_polymorphism = true} { + func.func public @main(%arg0: tensor) -> tensor { + %0 = stablehlo.get_dimension_size %arg0, dim = 0 : (tensor) -> tensor + %1 = stablehlo.get_dimension_size %arg0, dim = 1 : (tensor) -> tensor + %2 = stablehlo.create_token : !stablehlo.token + %3:2 = call @_wrapped_main(%2, %0, %1, %arg0) : (!stablehlo.token, tensor, tensor, tensor) -> (!stablehlo.token, tensor) + return %3#1 : tensor + } + + func.func private @_wrapped_main(%arg_token: !stablehlo.token, %arg0: tensor, %arg1: tensor, %arg3: tensor) -> (!stablehlo.token, tensor) { + %1 = stablehlo.sine %arg3 : tensor + %2 = stablehlo.constant dense<1.000000e+00> : tensor + %3 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> + %4 = stablehlo.reshape %arg1 : (tensor) -> tensor<1xi32> + %5 = stablehlo.concatenate %3, %4, dim = 0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %6 = stablehlo.dynamic_broadcast_in_dim %2, %5, dims = [] : (tensor, tensor<2xi32>) -> tensor + %7 = stablehlo.add %1, %6 : tensor + return %arg_token, %7 : !stablehlo.token, tensor + } +} +""") + return xla.call_module( + [x], + version=version, + module=module, + Tout=[x.dtype], + Sout=[x.shape], + has_token_input_output=False, + platforms=[self.testing_platform()], + ) + + self._assertOpOutputMatchesExpected(f, (x,), (1. + np.sin(x),)) + def test_wrong_actual_args_errors(self): x = np.arange(6, dtype=np.float32).reshape((3, 2)) y = np.arange(6, dtype=np.int32).reshape((2, 3)) diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch index 4c4228163a6f04..0cb078c3b89795 100644 --- a/third_party/stablehlo/temporary.patch +++ b/third_party/stablehlo/temporary.patch @@ -1426,7 +1426,353 @@ diff --ruN a/stablehlo/stablehlo/tests/stablehlo_canonicalize_dynamism.mlir b/st diff --ruN a/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir b/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir --- stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir +++ stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir -@@ -607,12 +607,55 @@ +@@ -31,6 +31,7 @@ + + // ----- + ++// CHECK-LABEL: module @has_main + module @has_main { + // CHECK: main + func.func @main(%arg0: tensor<4xf32>) -> tensor<*xi32> { +@@ -38,17 +39,11 @@ + %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> + func.return %0 : tensor<*xi32> + } +- +- // CHECK: helper +- func.func @helper(%arg0: tensor<4xf32>) -> tensor<*xi32> { +- // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<*xi32> +- %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> +- func.return %0 : tensor<*xi32> +- } +-} +- +-// ----- +- ++} ++ ++// ----- ++ ++// CHECK-LABEL: func @error_unsupported_operation + func.func @error_unsupported_operation(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> index { + // CHECK: stablehlo.add{{.*}} -> tensor + %0 = stablehlo.add %arg0, %arg1 : (tensor<4xf32>, tensor<4xf32>) -> tensor +@@ -472,11 +467,312 @@ + + // ----- + +-// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth +-func.func @refine_bitcast_convert_same_bitwidth(%arg0 : tensor<4xf32>) -> tensor<*xi32> { ++// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth_unranked_result ++func.func @refine_bitcast_convert_same_bitwidth_unranked_result(%arg0 : tensor<4xf32>) -> tensor<*xi32> { + // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<4xi32> + %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> + func.return %0 : tensor<*xi32> ++} ++ ++// ----- ++ ++// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth ++func.func @refine_bitcast_convert_same_bitwidth() -> tensor { ++ %0 = stablehlo.constant dense<[3, 5, 0]> : tensor<3xi32> ++ %21 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<3xi32>) -> tensor ++ // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<3x5x0xf32> ++ %48 = stablehlo.bitcast_convert %21 : (tensor) -> tensor ++ return %48 : tensor ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call ++module @refine_call { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %1 = stablehlo.constant dense<4> : tensor ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ %2 = call @refine_call_callee(%1, %0) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // CHECK: refine_call_callee(%arg0: tensor<4xf32>) -> tensor<4xf32> ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ // CHECK: stablehlo.constant dense<4> ++ %0 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> ++ %1 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<1xi32>) -> tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_arguments ++module @refine_call_dimension_arguments { ++ func.func public @main(%arg0: tensor) -> tensor { ++ // CHECK: [[RESULT:%.*]] = call @callee ++ // CHECK: return [[RESULT]] ++ %0 = stablehlo.constant dense<3> : tensor ++ %1 = call @callee(%0, %0, %arg0) : (tensor, tensor, tensor) -> tensor ++ return %1 : tensor ++ } ++ // %arg0 and %arg1 are dimension arguments ++ // CHECK: @callee([[ARG0:%.*]]: tensor) -> tensor ++ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> ++ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg1: tensor ++ %1 = stablehlo.add %0, %arg2: tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_prefix_token_and_dimension_arguments ++module @refine_call_prefix_token_and_dimension_arguments { ++ func.func public @main(%arg0: tensor) -> tensor { ++ // CHECK: [[RESULT:%.*]] = call @callee ++ // CHECK: return [[RESULT]] ++ %0 = stablehlo.constant dense<3> : tensor ++ %token = stablehlo.create_token : !stablehlo.token ++ %1 = call @callee(%token, %0, %0, %arg0) : (!stablehlo.token, tensor, tensor, tensor) -> tensor ++ return %1 : tensor ++ } ++ // %arg0 and %arg1 are dimension arguments ++ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor ++ func.func private @callee(%arg_token: !stablehlo.token, %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> ++ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg1: tensor ++ %1 = stablehlo.add %0, %arg2: tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_arguments_followed_by_token ++module @refine_call_dimension_arguments_followed_by_token { ++ func.func public @main(%arg0: tensor) -> tensor { ++ // CHECK: [[RESULT:%.*]] = call @callee ++ // CHECK: return [[RESULT]] ++ %0 = stablehlo.constant dense<3> : tensor ++ %token = stablehlo.create_token : !stablehlo.token ++ %1 = call @callee(%0, %0, %token, %arg0) : (tensor, tensor, !stablehlo.token, tensor) -> tensor ++ return %1 : tensor ++ } ++ // %arg0 and %arg1 are dimension arguments ++ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor ++ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg_token: !stablehlo.token, %arg2: tensor) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> ++ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg1: tensor ++ %1 = stablehlo.add %0, %arg2: tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_multiple_call_with_same_context ++module @refine_multiple_call_with_same_context { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ %2 = call @refine_call_callee(%arg0_new, %1) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_multiple_call_constant_function ++module @refine_multiple_call_constant_function { ++ func.func @main(%arg0: tensor<5xf32>) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<16> ++ // CHECK: return [[RESULT0]] ++ %0 = stablehlo.constant dense<4> : tensor ++ %1 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor ++ %2 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor ++ %3 = stablehlo.add %1, %2: tensor ++ return %3 : tensor ++ } ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor<5xf32>) -> tensor { ++ // CHECK: [[RESULT1:%.*]] = stablehlo.constant dense<8> ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg0: tensor ++ return %0 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_multiple_with_different_number_dimension_arguments { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ // Ensure that the first argument is not a constant at the second call site ++ %arg0_different_f32 = stablehlo.bitcast_convert %arg0_new : (tensor) -> tensor ++ %arg0_different_i32 = stablehlo.bitcast_convert %arg0_different_f32 : (tensor) -> tensor ++ // expected-error@+1{{incorrect number of operands for callee}} ++ %2 = call @refine_call_callee(%arg0_different_i32, %1) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context. Previous context had 1 and now we have 2 non-dimension arguments}} ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_multiple_different_dimension_arguments { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ %arg0_different = stablehlo.add %arg0_new, %arg0_new : tensor ++ // expected-error@+1{{incorrect number of operands for callee}} ++ %2 = call @refine_call_callee(%arg0_different, %1) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_multiple_different_non_dimension_arguments { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ %2 = stablehlo.constant dense<[1., 2.]> : tensor<2xf32> ++ %3 = stablehlo.concatenate %1, %2, dim = 0 : (tensor, tensor<2xf32>) -> tensor ++ // expected-error@+1{{incorrect number of operands for callee}} ++ %4 = call @refine_call_callee(%arg0_new, %3) : (tensor, tensor) -> tensor ++ return %4 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_recursive { ++ func.func @main() -> tensor { ++ %0 = stablehlo.constant dense<3> : tensor ++ %1 = call @refine_call_callee(%0) : (tensor) -> tensor ++ return %1 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee is being refined recursively}} ++ func.func @refine_call_callee(%arg0: tensor) -> tensor { ++ // expected-error@+1{{incorrect number of operands}} ++ %0 = call @refine_call_callee(%arg0) : (tensor) -> tensor ++ return %0 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_main_argument_unranked { ++ // expected-error@+1{{main must be refined with static shape arguments}} ++ func.func public @main(%arg0: tensor<*xi32>) -> tensor<*xi32> { ++ %2 = call @callee(%arg0) : (tensor<*xi32>) -> tensor<*xi32> ++ return %2 : tensor<*xi32> ++ } ++ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { ++ return %arg0 : tensor<*xi32> ++ } ++} ++ ++// ----- ++ ++module @refine_call_main_argument_dynamic_shape { ++ // expected-error@+1{{main must be refined with static shape arguments}} ++ func.func public @main(%arg0: tensor) -> tensor { ++ %2 = call @callee(%arg0) : (tensor) -> tensor ++ return %2 : tensor ++ } ++ func.func private @callee(%arg0: tensor) -> tensor { ++ return %arg0 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_callee_argument_unranked { ++ func.func public @main(%arg0: tensor<1xi64>) -> tensor<*xi32> { ++ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor<*xi32> ++ %2 = call @callee(%1) : (tensor<*xi32>) -> tensor<*xi32> ++ return %2 : tensor<*xi32> ++ } ++ // expected-error@+1{{callee must be refined with static shape arguments}} ++ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { ++ return %arg0 : tensor<*xi32> ++ } ++} ++ ++// ----- ++ ++module @refine_call_callee_argument_dynamic_shape { ++ func.func public @main(%arg0: tensor<1xi64>) -> tensor { ++ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor ++ %2 = call @callee(%1) : (tensor) -> tensor ++ return %2 : tensor ++ } ++ // expected-error@+1{{callee must be refined with static shape arguments}} ++ func.func private @callee(%arg0: tensor) -> tensor { ++ return %arg0 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_argument_non_scalar ++// The non-scalar constant is not folded into the callee ++module @refine_call_dimension_argument_non_scalar { ++ func.func public @main() -> tensor<4xi32> { ++ // CHECK: dense<[1, 2, 3, 4]> : tensor<4xi32> ++ %0 = stablehlo.constant dense<[1, 2, 3, 4]> : tensor<4xi32> ++ %1 = call @callee(%0) : (tensor<4xi32>) -> tensor<4xi32> ++ return %1 : tensor<4xi32> ++ } ++ func.func private @callee(%arg0: tensor<4xi32>) -> tensor<4xi32> { ++ // CHECK: return %arg0 : tensor<4xi32> ++ return %arg0 : tensor<4xi32> ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_argument_not_integer ++module @refine_call_dimension_argument_not_integer { ++ func.func public @main() -> tensor { ++ %0 = stablehlo.constant dense<3.> : tensor ++ // CHECK: call @callee({{.*}}) : (tensor) -> tensor ++ %2 = call @callee(%0) : (tensor) -> tensor ++ return %2 : tensor ++ } ++ func.func private @callee(%arg0: tensor) -> tensor { ++ return %arg0 : tensor ++ } + } + + // ----- +@@ -607,12 +903,55 @@ // ----- @@ -1631,7 +1977,74 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloCanonicalizeDynamism.cpp b/ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp --- stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp +++ stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp -@@ -43,6 +43,7 @@ +@@ -11,9 +11,48 @@ + See the License for the specific language governing permissions and + limitations under the License. + ==============================================================================*/ +- ++/* ++This shape refinement pass was designed to resolve the dynamic shapes in ++a StableHLO module produced by JAX serialization with shape polymorphism. ++Such a module has the following properties: ++ ++ * it contains a "main" function with statically-shaped arguments; ++ the result types may be dynamically shaped. ++ * all the dynamic shapes depend only on the input shapes (no shape ++ dependency on the input array contents). We refer to the operations that ++ depend transitively only on the input shapes (e.g., as given by ++ `stablehlo.get_dimension_size`) as `dimension` operations. ++ All dimension values can be resolved to constants through inter-procedural ++ constant folding. ++ * intermediate functions may take a number of token arguments (of type ++ !stablehlo.token) at the start of the argument list, followed by some ++ dimension arguments (integer scalars). ++ * some intermediate functions may return dimension values. ++ E.g., the `floordiv` operation on dimension values may be implemented ++ using intermediate functions. These constant functions need to be ++ constant-folded. ++ * All the dynamic shapes can be resolved through shape inference from the ++ dimension values. The dimension values themselves do not depend on the ++ result of shape inference. ++ ++ ++For each intermediate function we compute a refinement context, including ++the values of the dimension arguments and the static shapes of the other ++arguments. We compute the refinement context when we encounter a function call, ++and then we refine the callee recursively. We abort in the presence of ++recursive calls. ++We also abort if a function is called with multiple distinct refinement ++contexts. ++ ++After refinement, all operations should have static shapes, all calls to ++constant functions are replaced with constants, and all dimension arguments ++for intermediate functions are dropped and are replaced with constants. ++*/ ++#include + #include + #include ++#include ++#include + #include + #include + +@@ -24,8 +63,10 @@ + #include "llvm/ADT/SmallSet.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/StringRef.h" ++#include "llvm/Support/Debug.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/FormatVariadic.h" ++#include "llvm/Support/ScopedPrinter.h" + #include "mlir/Dialect/Func/IR/FuncOps.h" + #include "mlir/IR/BuiltinAttributes.h" + #include "mlir/IR/BuiltinOps.h" +@@ -39,10 +80,13 @@ + #include "mlir/IR/Types.h" + #include "mlir/IR/Value.h" + #include "mlir/Interfaces/InferTypeOpInterface.h" ++#include "mlir/Support/DebugStringHelper.h" + #include "mlir/Support/LogicalResult.h" ++#include "mlir/Support/LLVM.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "stablehlo/dialect/Base.h" #include "stablehlo/dialect/ChloOps.h" @@ -1639,7 +2052,407 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl #include "stablehlo/dialect/StablehloOps.h" #include "stablehlo/dialect/TypeInference.h" #include "stablehlo/transforms/Passes.h" -@@ -844,12 +845,97 @@ +@@ -50,10 +94,144 @@ + namespace mlir { + namespace stablehlo { + ++#define DEBUG_TYPE "stablehlo-refine-shapes" ++ + #define GEN_PASS_DEF_STABLEHLOREFINESHAPESPASS + #include "stablehlo/transforms/Passes.h.inc" + + namespace { ++ ++// Per-module state for shape refinement. ++class RefineShapeState { ++ public: ++ // Validates that we are not attempting to refine a function with a different ++ // context than previously, and are not attempting recursive refinement. ++ // Returns failure() if validation fails. On success, returns a boolean ++ // that specifies whether the function has already been refined. ++ FailureOr validateFunctionRefinement( ++ func::FuncOp func, SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes) { ++ StringRef funcName = func.getName(); ++ auto found = refinementContexts.find(func); ++ if (found == refinementContexts.end()) { ++ return false; // not already refined. ++ } ++ auto prevDimensionArguments = std::get<0>(found->second); ++ auto prevNonDimensionArgumentTypes = std::get<1>(found->second); ++ // Since we refine until fixed point, we will refine a call to a function ++ // both for the original function and for the refined one. In the latter ++ // case, we should have empty dimensionArguments but the same ++ // nonDimensionArgumentTypes. ++ if (prevNonDimensionArgumentTypes != nonDimensionArgumentTypes || ++ (!dimensionArguments.empty() && ++ prevDimensionArguments != dimensionArguments)) { ++ emitDifferentRefinementContextError( ++ func, /*dimensionArguments=*/dimensionArguments, ++ /*nonDimensionArgumentTypes=*/nonDimensionArgumentTypes, ++ /*prevDimensionArguments=*/prevDimensionArguments, ++ /*prevNonDimensionArgumentShapes=*/prevNonDimensionArgumentTypes); ++ return failure(); ++ } ++ for (auto funcOnStack : functionsBeingRefined) { ++ if (funcOnStack == funcName) { ++ func.emitOpError() << "Function " << funcName ++ << " is being refined recursively\n"; ++ return failure(); ++ } ++ } ++ return true; // already refined. ++ } ++ ++ // Updates the state to signal the starting of a function refinement. ++ // Callers must call `finishFunctionRefinement` when done. ++ void startFunctionRefinement(func::FuncOp func, ++ SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes) { ++ StringRef funcName = func.getName(); ++ functionsBeingRefined.push_back(funcName); ++ refinementContexts[func] = ++ std::make_tuple(dimensionArguments, nonDimensionArgumentTypes); ++ } ++ ++ // Updates the state to signal the starting of a function refinement. ++ LogicalResult finishFunctionRefinement(func::FuncOp func) { ++ if (func.getName() != ++ functionsBeingRefined[functionsBeingRefined.size() - 1]) { ++ func.emitOpError() << "Expected to find " << func.getName() ++ << " at the top of the stack"; ++ return failure(); ++ } ++ functionsBeingRefined.pop_back(); ++ return success(); ++ } ++ ++ private: ++ // Maps refined functions to the refinement context: the values of dimension ++ // arguments and the types of non-dimension arguments. A function is added ++ // here when we start refining it. ++ DenseMap, SmallVector>> ++ refinementContexts; ++ ++ // A stack of functions that are in the process of being refined, the current ++ // one is last. ++ SmallVector functionsBeingRefined; ++ ++ void emitDifferentRefinementContextError( ++ func::FuncOp func, SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes, ++ SmallVector prevDimensionArguments, ++ SmallVector prevNonDimensionArgumentShapes) { ++ InFlightDiagnostic msg = func.emitOpError(); ++ msg << "Function " << func.getName() ++ << " has already been refined with a different " ++ "refinement context. "; ++ int countShowNonDimensionArguments = ++ std::min(prevNonDimensionArgumentShapes.size(), ++ nonDimensionArgumentTypes.size()); ++ if (prevNonDimensionArgumentShapes.size() != ++ nonDimensionArgumentTypes.size()) { ++ msg << "Previous context had " << prevNonDimensionArgumentShapes.size() ++ << " and now we have " << nonDimensionArgumentTypes.size() ++ << " non-dimension arguments. "; ++ } ++ msg << "The differences among the first " << countShowNonDimensionArguments ++ << " non-dimension argument types are: "; ++ for (auto i = 0; i < countShowNonDimensionArguments; ++i) { ++ if (prevNonDimensionArgumentShapes[i] != nonDimensionArgumentTypes[i]) { ++ msg << "Non-dimension argument[" << i << "] previously had type " ++ << debugString(prevNonDimensionArgumentShapes[i]) ++ << " and now has type " << debugString(nonDimensionArgumentTypes[i]) ++ << ". "; ++ } ++ } ++ int countShowDimensionArguments = ++ std::min(prevDimensionArguments.size(), dimensionArguments.size()); ++ if (prevDimensionArguments.size() != dimensionArguments.size()) { ++ msg << "Previous context had " << prevDimensionArguments.size() ++ << " and now we have " << dimensionArguments.size() ++ << " dimension arguments. "; ++ } ++ msg << "The differences among the first " << countShowDimensionArguments ++ << " dimension arguments are: "; ++ for (auto i = 0; i < countShowDimensionArguments; ++i) { ++ if (prevDimensionArguments[i] != dimensionArguments[i]) { ++ msg << "Dimension argument[" << i << "] previously was " ++ << prevDimensionArguments[i].getSExtValue() << " and now is " ++ << dimensionArguments[i].getSExtValue() << ". "; ++ } ++ } ++ } ++}; ++ ++// Refines a function. ++// Returns `true` if the function had already been processed with the same ++// refinement context and `false` if this is the first time we refined the ++// function. Returns failure() if we encounter an error. ++LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, ++ RefineShapeState* state, ++ size_t nrPrefixTokenArguments, ++ SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes); + + // DenseElementsAttr can be constructed from ArrayRef but not from + // ArrayRef. This helper bridges the gap. +@@ -424,11 +602,10 @@ + diag << "refineValues failed for " << types << ": expected " + << values.size() << " types, got " << types.size(); + }); +- +- // Check whether `types` contain any new information with respect to existing +- // return types. Even if just a single dimension size out of an entire tensor +- // type got updated, using `inferMostSpecificType` ensures that we don't +- // miss that. ++ // Check whether `types` contain any new information with respect to ++ // existing return types. Even if just a single dimension size out of an ++ // entire tensor type got updated, using `inferMostSpecificType` ensures ++ // that we don't miss that. + bool needsRefinement = false; + SmallVector refinedTypes; + for (auto it : llvm::zip(values.getTypes(), types)) { +@@ -468,11 +645,13 @@ + + // Simply changing operand type of `func.return` won't work because + // that won't update the FunctionType of the enclosing `func.func`. +- // Nonetheless, we still want to support these ops because they are widely +- // used in StableHLO programs (although the plan of record is to replace +- // `func.return` ops in StableHLO programs with `stablehlo.return`: +- // https://github.com/openxla/stablehlo/issues/425). ++ // Nonetheless, we still want to support these ops because they are ++ // widely used in StableHLO programs (although the plan of record is to ++ // replace `func.return` ops in StableHLO programs with ++ // `stablehlo.return`: https://github.com/openxla/stablehlo/issues/425). + if (isa(user)) continue; ++ ++ if (isa(user)) continue; + + // Unlike in TensorFlow's type inference pass, here we work only with + // allowlisted ops to focus our support on well-defined semantics of +@@ -489,7 +668,8 @@ + value.setType(refinedType); + + // Special case: for `func.return`, guard the refinement with a cast +- // and leave propagation of the refined return type to a dedicated pattern. ++ // and leave propagation of the refined return type to a dedicated ++ // pattern. + auto isFuncReturn = [](OpOperand& use) -> bool { + return isa(use.getOwner()); + }; +@@ -505,8 +685,8 @@ + + // Refines the return types of the given operation using the given types. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, + ArrayRef types) { + if (failed(refineValues(rewriter, op, op->getResults(), types))) +@@ -528,12 +708,12 @@ + // traversal, and only then we apply the refinements. If there are other + // types, then the corresponding refinements must be completely empty. + // 2) Encodings are not supported. In principle, TypeExtensions should be +-// supportable, but this needs careful thinking through. Given that no one +-// asked for support for bounded dynamism in this pass yet, this is left +-// for future work. ++// supportable, but this needs careful thinking through. Given that no ++// one asked for support for bounded dynamism in this pass yet, this is ++// left for future work. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, + ArrayRef refinements) { + SmallVector flattenedTypes; +@@ -623,8 +803,8 @@ + + // Refines the return type of the given operation using the given shape. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + template + LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, + ArrayRef shape) { +@@ -633,8 +813,8 @@ + + // Refines the return type of the given operation using the given shape. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + template + LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, + Value shapeValue) { +@@ -647,6 +827,52 @@ + return refineReturnShape(rewriter, op, shape); + } + ++// Dimension arguments are leading scalar constant arguments, optionally ++// preceeded by some stablehlo.token arguments. ++SmallVector getDimensionArguments(func::CallOp callOp, ++ size_t* nrPrefixTokenArguments) { ++ *nrPrefixTokenArguments = 0; ++ SmallVector operands = callOp.getOperands(); ++ SmallVector dimensionArguments; ++ for (size_t i = 0; i < operands.size(); ++i) { ++ if (i == *nrPrefixTokenArguments && isa(operands[i].getType())) { ++ (*nrPrefixTokenArguments)++; ++ continue; ++ } ++ RankedTensorType operandType = ++ dyn_cast(operands[i].getType()); ++ if (!operandType || operandType.getRank() != 0 || ++ !operandType.getElementType().template isa()) ++ break; ++ SmallVector operand_int; ++ if (failed(hlo::matchInts(operands[i], operand_int))) { ++ break; ++ } ++ dimensionArguments.push_back(operand_int[0]); ++ } ++ return dimensionArguments; ++} ++ ++std::optional> isConstantFunction( ++ func::FuncOp func) { ++ LLVM_DEBUG(llvm::dbgs() << "check if " << func.getName() ++ << " is a constant function\n"); ++ SmallVector returnedConstants; ++ func::ReturnOp ret = *func.getOps().begin(); ++ bool isConstant = llvm::all_of(ret->getOperands(), [&](auto returnVal) { ++ DenseIntElementsAttr attr; ++ Operation* return_operand_def = returnVal.getDefiningOp(); ++ if (return_operand_def && ++ matchPattern(return_operand_def, m_Constant(&attr))) { ++ returnedConstants.push_back(attr); ++ return true; ++ } ++ return false; ++ }); ++ if (isConstant) return returnedConstants; ++ return std::nullopt; ++} ++ + struct RefineAllGatherOpPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(AllGatherOp op, +@@ -655,9 +881,9 @@ + if (!operandType.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked operand type"); + +- // This represents the cross_replica_and_partition process grouping strategy +- // that requires num_partitions to compute shardCount. Since we don't know +- // num_partitions at this point, we error out. ++ // This represents the cross_replica_and_partition process grouping ++ // strategy that requires num_partitions to compute shardCount. Since we ++ // don't know num_partitions at this point, we error out. + if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) + return rewriter.notifyMatchFailure(op, "unsupported strategy"); + DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); +@@ -678,12 +904,11 @@ + auto operandType = op.getOperand().getType(); + if (!operandType.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked operand type"); +- ++ auto resultType = op.getType(); + // If bit widths of the operand and the result are different, then + // operand and result shapes have different ranks. + // This complicates the logic quite a bit and is not needed to pass the + // current tests, so we leave this for future work. +- auto resultType = op.getType(); + auto getBitWidthFn = [](ShapedType type) { + auto elementType = type.getElementType(); + if (auto complexType = elementType.dyn_cast()) +@@ -694,8 +919,77 @@ + if (getBitWidthFn(operandType) != getBitWidthFn(resultType)) + return rewriter.notifyMatchFailure(op, "unsupported bit width"); + +- return refineReturnShape(rewriter, op, operandType.getShape()); +- } ++ auto res = refineReturnShape(rewriter, op, operandType.getShape()); ++ if (failed(res)) return failure(); ++ if (op.getOperand().getType() == op.getResult().getType()) { ++ LLVM_DEBUG({ llvm::dbgs() << " ** remove no-op bitcast convert\n"; }); ++ rewriter.replaceOp(op, op.getOperand()); ++ } ++ return success(); ++ } ++}; ++ ++struct RefineCallOpPattern : public OpRewritePattern { ++ using OpRewritePattern::OpRewritePattern; ++ ++ RefineCallOpPattern(MLIRContext* context, RefineShapeState* state) ++ : OpRewritePattern(context), _state(state) {} ++ ++ LogicalResult matchAndRewrite(func::CallOp op, ++ PatternRewriter& rewriter) const override { ++ LLVM_DEBUG({ llvm::dbgs() << "refineCallOp " << debugString(op) << "\n"; }); ++ ++ // We have a number of prefix token arguments, then the dimension arguments ++ size_t nrPrefixTokenArguments = 0; ++ SmallVector dimensionArguments = ++ getDimensionArguments(op, &nrPrefixTokenArguments); ++ SmallVector nonDimensionArgumentTypes; ++ SmallVector nonDimensionArguments; ++ SmallVector operands = op.getOperands(); ++ for (size_t i = 0; i < operands.size(); ++i) { ++ // Skip the dimension arguments. ++ if (i >= nrPrefixTokenArguments && ++ i < nrPrefixTokenArguments + dimensionArguments.size()) { ++ continue; ++ } ++ nonDimensionArgumentTypes.push_back(operands[i].getType()); ++ nonDimensionArguments.push_back(operands[i]); ++ } ++ FlatSymbolRefAttr calleeName = op.getCalleeAttr(); ++ const SymbolTable symbolTable(op->getParentOfType()); ++ func::FuncOp callee = dyn_cast( ++ symbolTable.lookupNearestSymbolFrom(op, calleeName.getAttr())); ++ if (!callee) ++ return rewriter.notifyMatchFailure( ++ op, "cannot find callee in the current scope"); ++ if (failed(refineFunction(callee, rewriter.getContext(), _state, ++ nrPrefixTokenArguments, dimensionArguments, ++ nonDimensionArgumentTypes))) ++ return failure(); ++ ++ // Is the callee a constant function in this refinement context? ++ std::optional> constantAttrs = ++ isConstantFunction(callee); ++ if (constantAttrs.has_value()) { ++ SmallVector constants; ++ for (auto constAttr : constantAttrs.value()) { ++ constants.push_back( ++ rewriter.create(op.getLoc(), constAttr)); ++ } ++ rewriter.replaceOp(op, constants); ++ return success(); ++ } ++ if (!dimensionArguments.empty()) { ++ // Drop the dimension arguments, but only if necessary, or else we ++ // will end up trying to refine the new CallOp forever. ++ op = rewriter.replaceOpWithNewOp( ++ op, op.getResultTypes(), callee.getSymName(), nonDimensionArguments); ++ } ++ return refineReturnTypes(rewriter, op, callee.getResultTypes()); ++ } ++ ++ private: ++ RefineShapeState* _state; + }; + + struct RefineConvertOpPattern : public OpRewritePattern { +@@ -844,12 +1138,98 @@ } }; @@ -1712,8 +2525,9 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl + return rewriter.notifyMatchFailure(op, "expected constant output_shape"); + + // We only need to refine the shape of `output` (the second result). -+ // The shape of `output_state` (the first result) is determined by the shape -+ // of `initial_state`, so we ignore it and provide an empty refinement. ++ // The shape of `output_state` (the first result) is determined by the ++ // shape of `initial_state`, so we ignore it and provide an empty ++ // refinement. + return refineReturnTypes(rewriter, op, {{initialStateType}, {outputShape}}); + } +}; @@ -1737,15 +2551,349 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl } }; -@@ -1181,7 +1267,10 @@ - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); -+ patterns.add(&getContext()); - patterns.add(&getContext()); -+ patterns.add(&getContext()); -+ patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); +@@ -865,11 +1245,11 @@ + if (!isa(op->getDialect())) + return rewriter.notifyMatchFailure(op, "unsupported dialect"); + +- // For the ops that implement InferTypeOpInterface, we reinfer their return +- // types and see what happens. +- // Operands of these ops might have been refined elsewhere (e.g. someone +- // might have updated argument types of a function) or earlier during this +- // pass, and this might enable refinement opportunities downstream. ++ // For the ops that implement InferTypeOpInterface, we reinfer their ++ // return types and see what happens. Operands of these ops might have ++ // been refined elsewhere (e.g. someone might have updated argument types ++ // of a function) or earlier during this pass, and this might enable ++ // refinement opportunities downstream. + SmallVector inferredReturnTypes; + if (failed(op.inferReturnTypes(getContext(), /*location=*/{}, + op->getOperands(), op->getAttrDictionary(), +@@ -925,8 +1305,8 @@ + sliceSizesAttr.size(), + RankedTensorType::get({}, startIndicesElementType)); + +- // RealDynamicSliceOp can take tensors of integer or index element types. +- // DynamicSliceOp::slice_sizes only supports i64 element type. ++ // RealDynamicSliceOp can take tensors of integer or index element ++ // types. DynamicSliceOp::slice_sizes only supports i64 element type. + // Adapt accordingly in order to be compatible with inferDynamicSliceOp. + SmallVector sliceSizes; + for (auto element : sliceSizesAttr.getValues()) { +@@ -956,9 +1336,9 @@ + if (!operandType.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked operand type"); + +- // This represents the cross_replica_and_partition process grouping strategy +- // that requires num_partitions to compute shardCount. Since we don't know +- // num_partitions at this point, we error out. ++ // This represents the cross_replica_and_partition process grouping ++ // strategy that requires num_partitions to compute shardCount. Since we ++ // don't know num_partitions at this point, we error out. + if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) + return rewriter.notifyMatchFailure(op, "unsupported strategy"); + DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); +@@ -998,9 +1378,9 @@ + PatternRewriter& rewriter) const override { + // Push the potentially refined operand types into the nested regions. + // This can lead to refinements of the return types of the body (but not +- // of the cond since it always returns tensor), but the key insight here +- // is that the enclosing while op doesn't care about these refinements +- // (because its return types are equal to its operand types). ++ // of the cond since it always returns tensor), but the key insight ++ // here is that the enclosing while op doesn't care about these ++ // refinements (because its return types are equal to its operand types). + // If we end up with incompatibilities between while's return types and + // body's return types, the verifier will tell us about that. This means + // that the original program wasn't well-formed. TODO(burmako): Implement +@@ -1050,8 +1430,8 @@ + if (failed(mostSpecificType) || destType == *mostSpecificType) continue; + + // If the source type of the cast is more specific than the target type, +- // then we conclude that the cast is redundant (i.e. needs to be removed) +- // and that the return type of the function needs an update. ++ // then we conclude that the cast is redundant (i.e. needs to be ++ // removed) and that the return type of the function needs an update. + needsUpdate = true; + updatedResultTypes[i] = sourceType; + +@@ -1066,9 +1446,6 @@ + for (auto cast : castsToReplace) + rewriter.replaceOp(cast, cast->getOperands()); + +- // If the type of the enclosing `func.func` needs an update, we simply +- // call setType. We can afford this simplicity because our algorithm +- // currently supports only one function per module. + auto func = cast(op->getParentOp()); + func.setType( + rewriter.getFunctionType(func.getArgumentTypes(), updatedResultTypes)); +@@ -1100,22 +1477,186 @@ + } + }; + ++LogicalResult applyRewritePatterns(func::FuncOp func, MLIRContext* context, ++ RefineShapeState* state) { ++ // TODO(#1048): Find out why .maxIterations = 1 no longer works. ++ // There have been recent refactors to applyPatternsAndFoldGreedily ++ // upstream, and that might be the reason. ++ GreedyRewriteConfig config; ++ config.useTopDownTraversal = true; ++ config.enableRegionSimplification = true; ++ config.maxIterations = 2; ++ config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; ++ config.strictMode = GreedyRewriteStrictness::AnyOp; ++ ++ RewritePatternSet patterns(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context, state); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { ++ func.emitOpError() << "applyPatternsAndFoldGreedily failed"; ++ return failure(); ++ } ++ return success(); ++} ++ ++LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, ++ RefineShapeState* state, ++ size_t nrPrefixTokenArguments, ++ SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes) { ++ // The nonDimensionArgumentTypes include the prefix token arguments. ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() << ": initial type " ++ << debugString(func.getFunctionType()) << "\n"; ++ llvm::dbgs() << " has " << nrPrefixTokenArguments << " prefix tokens\n"; ++ for (size_t i = 0; i < dimensionArguments.size(); ++i) { ++ llvm::dbgs() << " with dimension arg[" << i ++ << "] = " << dimensionArguments[i] << "\n"; ++ } ++ }); ++ // Check that the argument types have static shapes. ++ for (size_t i = 0; i < nonDimensionArgumentTypes.size(); ++i) { ++ if (i < nrPrefixTokenArguments) continue; ++ auto argType = nonDimensionArgumentTypes[i]; ++ if (isa(argType)) continue; ++ auto argRankedTensorType = dyn_cast(argType); ++ if (!argRankedTensorType || !argRankedTensorType.hasStaticShape()) { ++ func.emitOpError() << func.getName() ++ << " must be refined with static shape arguments. " ++ << "Found argument of type " << debugString(argType); ++ return failure(); ++ } ++ } ++ auto alreadyRefined = state->validateFunctionRefinement( ++ func, dimensionArguments, nonDimensionArgumentTypes); ++ if (failed(alreadyRefined)) { ++ return failure(); ++ } ++ if (*alreadyRefined) { ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() ++ << ": skipping, already refined\n"; ++ }); ++ return success(); ++ } ++ state->startFunctionRefinement(func, dimensionArguments, ++ nonDimensionArgumentTypes); ++ // Only one block per function is supported at the moment. ++ // At the StableHLO level, functions are expected to only have one block, ++ // so supporting more is out of scope for this pass. ++ if (!func.getRegion().hasOneBlock()) { ++ func.emitOpError() << "must have exactly one block"; ++ return failure(); ++ } ++ ++ // Replace all dimension arguments with constants and remove those arguments. ++ // Wrap non-dimension arguments with bitcast_convert. ++ OpBuilder op_builder(func.getRegion()); ++ op_builder.setInsertionPointToStart(&func.getRegion().front()); ++ size_t firstNonDimensionArg = ++ nrPrefixTokenArguments + dimensionArguments.size(); ++ for (size_t i = 0; i < func.getNumArguments(); ++i) { ++ BlockArgument arg = func.getArgument(i); ++ Type argType = arg.getType(); ++ if (i < nrPrefixTokenArguments) { ++ continue; ++ } ++ if (i < firstNonDimensionArg) { ++ ShapedType argShapedType = dyn_cast(argType); ++ if (!argShapedType) { ++ func.emitOpError() << "dimension arguments must have shaped types"; ++ return failure(); ++ } ++ // We will drop the dimension arguments, replace them with constants. ++ auto replacement_op = op_builder.create( ++ arg.getLoc(), argType, ++ getTensorAttr(argShapedType, ++ dimensionArguments[i - nrPrefixTokenArguments])); ++ arg.replaceAllUsesWith(replacement_op); ++ } else { ++ int nonDimensionArgumentIndex = ++ nrPrefixTokenArguments + i - firstNonDimensionArg; ++ Type refinedType = nonDimensionArgumentTypes[nonDimensionArgumentIndex]; ++ if (refinedType != argType) { ++ // We add BitcastConvertOp as the only uses of the non-dimension ++ // arguments to ensure the module stays valid after we set the argument ++ // type. ++ auto replacement_op = op_builder.create( ++ arg.getLoc(), argType, arg); ++ arg.replaceAllUsesExcept(replacement_op->getResult(0), replacement_op); ++ arg.setType(refinedType); ++ } ++ } ++ } ++ BitVector argIndices(func.getNumArguments()); ++ argIndices.set(nrPrefixTokenArguments, firstNonDimensionArg); ++ func.eraseArguments(argIndices); ++ func.setType(op_builder.getFunctionType(nonDimensionArgumentTypes, ++ func.getResultTypes())); ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() << ": set type to " ++ << func.getFunctionType() << "\n"; ++ }); ++ if (failed(applyRewritePatterns(func, context, state))) return failure(); ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() << ": end with type " ++ << debugString(func.getFunctionType()) << "\n"; ++ }); ++ if (failed(state->finishFunctionRefinement(func))) return failure(); ++ return success(); ++} ++ + struct StablehloRefineShapesPass + : public impl::StablehloRefineShapesPassBase { + using StablehloRefineShapesPassBase::StablehloRefineShapesPassBase; + + void runOnOperation() override { +- // Only one function per module is supported at the moment to avoid the need +- // to think about iterative type inference algorithms. +- // Current use cases are served well by inlining multiple functions into +- // a single function, so we leave native support for multiple functions to +- // future work. + // To enable modules that contain CustomCallOp::called_computations, + // we allow multiple functions, in which case we only refine the main + // function called "main", assuming that the called computations will have + // static shapes. Lifting this assumption and expanding refinement to + // multiple functions is left for future work. + ModuleOp module = getOperation(); ++ RefineShapeState state; + auto funcs = llvm::to_vector(module.getOps()); + if (funcs.empty()) return; + func::FuncOp func; +@@ -1130,70 +1671,14 @@ + << " function to clearly identify which function will be refined"; + return signalPassFailure(); + } +- +- // Similarly, only one block per function is supported at the moment. +- // At the StableHLO level, functions are expected to only have one block, +- // so supporting more is out of scope for this pass. +- if (!func.getRegion().hasOneBlock()) { +- func.emitOpError() << "must have exactly one block"; ++ SmallVector emptyDimensionArguments; ++ SmallVector nonDimensionArgumentTypes; ++ for (auto arg : func.getArguments()) ++ nonDimensionArgumentTypes.push_back(arg.getType()); ++ if (failed(refineFunction(func, &getContext(), &state, 0, ++ emptyDimensionArguments, ++ nonDimensionArgumentTypes))) + return signalPassFailure(); +- } +- +- // The algorithm behind this pass consists of a single traversal of the +- // function. This is sufficient because we only support one function per +- // program at the moment. +- // TODO(#1048): Find out why .maxIterations = 1 no longer works. +- // There have been recent refactors to applyPatternsAndFoldGreedily +- // upstream, and that might be the reason. +- GreedyRewriteConfig config; +- config.useTopDownTraversal = true; +- config.enableRegionSimplification = true; +- config.maxIterations = 2; +- config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; +- config.strictMode = GreedyRewriteStrictness::AnyOp; +- +- RewritePatternSet patterns(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- if (failed( +- applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { +- return signalPassFailure(); +- } + } + }; + diff --git a/third_party/xla/third_party/stablehlo/temporary.patch b/third_party/xla/third_party/stablehlo/temporary.patch index 4c4228163a6f04..0cb078c3b89795 100644 --- a/third_party/xla/third_party/stablehlo/temporary.patch +++ b/third_party/xla/third_party/stablehlo/temporary.patch @@ -1426,7 +1426,353 @@ diff --ruN a/stablehlo/stablehlo/tests/stablehlo_canonicalize_dynamism.mlir b/st diff --ruN a/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir b/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir --- stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir +++ stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir -@@ -607,12 +607,55 @@ +@@ -31,6 +31,7 @@ + + // ----- + ++// CHECK-LABEL: module @has_main + module @has_main { + // CHECK: main + func.func @main(%arg0: tensor<4xf32>) -> tensor<*xi32> { +@@ -38,17 +39,11 @@ + %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> + func.return %0 : tensor<*xi32> + } +- +- // CHECK: helper +- func.func @helper(%arg0: tensor<4xf32>) -> tensor<*xi32> { +- // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<*xi32> +- %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> +- func.return %0 : tensor<*xi32> +- } +-} +- +-// ----- +- ++} ++ ++// ----- ++ ++// CHECK-LABEL: func @error_unsupported_operation + func.func @error_unsupported_operation(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> index { + // CHECK: stablehlo.add{{.*}} -> tensor + %0 = stablehlo.add %arg0, %arg1 : (tensor<4xf32>, tensor<4xf32>) -> tensor +@@ -472,11 +467,312 @@ + + // ----- + +-// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth +-func.func @refine_bitcast_convert_same_bitwidth(%arg0 : tensor<4xf32>) -> tensor<*xi32> { ++// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth_unranked_result ++func.func @refine_bitcast_convert_same_bitwidth_unranked_result(%arg0 : tensor<4xf32>) -> tensor<*xi32> { + // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<4xi32> + %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> + func.return %0 : tensor<*xi32> ++} ++ ++// ----- ++ ++// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth ++func.func @refine_bitcast_convert_same_bitwidth() -> tensor { ++ %0 = stablehlo.constant dense<[3, 5, 0]> : tensor<3xi32> ++ %21 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<3xi32>) -> tensor ++ // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<3x5x0xf32> ++ %48 = stablehlo.bitcast_convert %21 : (tensor) -> tensor ++ return %48 : tensor ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call ++module @refine_call { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %1 = stablehlo.constant dense<4> : tensor ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ %2 = call @refine_call_callee(%1, %0) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // CHECK: refine_call_callee(%arg0: tensor<4xf32>) -> tensor<4xf32> ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ // CHECK: stablehlo.constant dense<4> ++ %0 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> ++ %1 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<1xi32>) -> tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_arguments ++module @refine_call_dimension_arguments { ++ func.func public @main(%arg0: tensor) -> tensor { ++ // CHECK: [[RESULT:%.*]] = call @callee ++ // CHECK: return [[RESULT]] ++ %0 = stablehlo.constant dense<3> : tensor ++ %1 = call @callee(%0, %0, %arg0) : (tensor, tensor, tensor) -> tensor ++ return %1 : tensor ++ } ++ // %arg0 and %arg1 are dimension arguments ++ // CHECK: @callee([[ARG0:%.*]]: tensor) -> tensor ++ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> ++ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg1: tensor ++ %1 = stablehlo.add %0, %arg2: tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_prefix_token_and_dimension_arguments ++module @refine_call_prefix_token_and_dimension_arguments { ++ func.func public @main(%arg0: tensor) -> tensor { ++ // CHECK: [[RESULT:%.*]] = call @callee ++ // CHECK: return [[RESULT]] ++ %0 = stablehlo.constant dense<3> : tensor ++ %token = stablehlo.create_token : !stablehlo.token ++ %1 = call @callee(%token, %0, %0, %arg0) : (!stablehlo.token, tensor, tensor, tensor) -> tensor ++ return %1 : tensor ++ } ++ // %arg0 and %arg1 are dimension arguments ++ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor ++ func.func private @callee(%arg_token: !stablehlo.token, %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> ++ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg1: tensor ++ %1 = stablehlo.add %0, %arg2: tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_arguments_followed_by_token ++module @refine_call_dimension_arguments_followed_by_token { ++ func.func public @main(%arg0: tensor) -> tensor { ++ // CHECK: [[RESULT:%.*]] = call @callee ++ // CHECK: return [[RESULT]] ++ %0 = stablehlo.constant dense<3> : tensor ++ %token = stablehlo.create_token : !stablehlo.token ++ %1 = call @callee(%0, %0, %token, %arg0) : (tensor, tensor, !stablehlo.token, tensor) -> tensor ++ return %1 : tensor ++ } ++ // %arg0 and %arg1 are dimension arguments ++ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor ++ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg_token: !stablehlo.token, %arg2: tensor) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> ++ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg1: tensor ++ %1 = stablehlo.add %0, %arg2: tensor ++ return %1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_multiple_call_with_same_context ++module @refine_multiple_call_with_same_context { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ %2 = call @refine_call_callee(%arg0_new, %1) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_multiple_call_constant_function ++module @refine_multiple_call_constant_function { ++ func.func @main(%arg0: tensor<5xf32>) -> tensor { ++ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<16> ++ // CHECK: return [[RESULT0]] ++ %0 = stablehlo.constant dense<4> : tensor ++ %1 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor ++ %2 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor ++ %3 = stablehlo.add %1, %2: tensor ++ return %3 : tensor ++ } ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor<5xf32>) -> tensor { ++ // CHECK: [[RESULT1:%.*]] = stablehlo.constant dense<8> ++ // CHECK: return [[RESULT1]] ++ %0 = stablehlo.add %arg0, %arg0: tensor ++ return %0 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_multiple_with_different_number_dimension_arguments { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ // Ensure that the first argument is not a constant at the second call site ++ %arg0_different_f32 = stablehlo.bitcast_convert %arg0_new : (tensor) -> tensor ++ %arg0_different_i32 = stablehlo.bitcast_convert %arg0_different_f32 : (tensor) -> tensor ++ // expected-error@+1{{incorrect number of operands for callee}} ++ %2 = call @refine_call_callee(%arg0_different_i32, %1) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context. Previous context had 1 and now we have 2 non-dimension arguments}} ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_multiple_different_dimension_arguments { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ %arg0_different = stablehlo.add %arg0_new, %arg0_new : tensor ++ // expected-error@+1{{incorrect number of operands for callee}} ++ %2 = call @refine_call_callee(%arg0_different, %1) : (tensor, tensor) -> tensor ++ return %2 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_multiple_different_non_dimension_arguments { ++ func.func @main(%arg1: tensor<4xf32>) -> tensor { ++ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor ++ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor ++ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor ++ %2 = stablehlo.constant dense<[1., 2.]> : tensor<2xf32> ++ %3 = stablehlo.concatenate %1, %2, dim = 0 : (tensor, tensor<2xf32>) -> tensor ++ // expected-error@+1{{incorrect number of operands for callee}} ++ %4 = call @refine_call_callee(%arg0_new, %3) : (tensor, tensor) -> tensor ++ return %4 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} ++ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { ++ return %arg1 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_recursive { ++ func.func @main() -> tensor { ++ %0 = stablehlo.constant dense<3> : tensor ++ %1 = call @refine_call_callee(%0) : (tensor) -> tensor ++ return %1 : tensor ++ } ++ // expected-error@+1{{Function refine_call_callee is being refined recursively}} ++ func.func @refine_call_callee(%arg0: tensor) -> tensor { ++ // expected-error@+1{{incorrect number of operands}} ++ %0 = call @refine_call_callee(%arg0) : (tensor) -> tensor ++ return %0 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_main_argument_unranked { ++ // expected-error@+1{{main must be refined with static shape arguments}} ++ func.func public @main(%arg0: tensor<*xi32>) -> tensor<*xi32> { ++ %2 = call @callee(%arg0) : (tensor<*xi32>) -> tensor<*xi32> ++ return %2 : tensor<*xi32> ++ } ++ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { ++ return %arg0 : tensor<*xi32> ++ } ++} ++ ++// ----- ++ ++module @refine_call_main_argument_dynamic_shape { ++ // expected-error@+1{{main must be refined with static shape arguments}} ++ func.func public @main(%arg0: tensor) -> tensor { ++ %2 = call @callee(%arg0) : (tensor) -> tensor ++ return %2 : tensor ++ } ++ func.func private @callee(%arg0: tensor) -> tensor { ++ return %arg0 : tensor ++ } ++} ++ ++// ----- ++ ++module @refine_call_callee_argument_unranked { ++ func.func public @main(%arg0: tensor<1xi64>) -> tensor<*xi32> { ++ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor<*xi32> ++ %2 = call @callee(%1) : (tensor<*xi32>) -> tensor<*xi32> ++ return %2 : tensor<*xi32> ++ } ++ // expected-error@+1{{callee must be refined with static shape arguments}} ++ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { ++ return %arg0 : tensor<*xi32> ++ } ++} ++ ++// ----- ++ ++module @refine_call_callee_argument_dynamic_shape { ++ func.func public @main(%arg0: tensor<1xi64>) -> tensor { ++ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor ++ %2 = call @callee(%1) : (tensor) -> tensor ++ return %2 : tensor ++ } ++ // expected-error@+1{{callee must be refined with static shape arguments}} ++ func.func private @callee(%arg0: tensor) -> tensor { ++ return %arg0 : tensor ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_argument_non_scalar ++// The non-scalar constant is not folded into the callee ++module @refine_call_dimension_argument_non_scalar { ++ func.func public @main() -> tensor<4xi32> { ++ // CHECK: dense<[1, 2, 3, 4]> : tensor<4xi32> ++ %0 = stablehlo.constant dense<[1, 2, 3, 4]> : tensor<4xi32> ++ %1 = call @callee(%0) : (tensor<4xi32>) -> tensor<4xi32> ++ return %1 : tensor<4xi32> ++ } ++ func.func private @callee(%arg0: tensor<4xi32>) -> tensor<4xi32> { ++ // CHECK: return %arg0 : tensor<4xi32> ++ return %arg0 : tensor<4xi32> ++ } ++} ++ ++// ----- ++ ++// CHECK-LABEL: module @refine_call_dimension_argument_not_integer ++module @refine_call_dimension_argument_not_integer { ++ func.func public @main() -> tensor { ++ %0 = stablehlo.constant dense<3.> : tensor ++ // CHECK: call @callee({{.*}}) : (tensor) -> tensor ++ %2 = call @callee(%0) : (tensor) -> tensor ++ return %2 : tensor ++ } ++ func.func private @callee(%arg0: tensor) -> tensor { ++ return %arg0 : tensor ++ } + } + + // ----- +@@ -607,12 +903,55 @@ // ----- @@ -1631,7 +1977,74 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloCanonicalizeDynamism.cpp b/ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp --- stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp +++ stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp -@@ -43,6 +43,7 @@ +@@ -11,9 +11,48 @@ + See the License for the specific language governing permissions and + limitations under the License. + ==============================================================================*/ +- ++/* ++This shape refinement pass was designed to resolve the dynamic shapes in ++a StableHLO module produced by JAX serialization with shape polymorphism. ++Such a module has the following properties: ++ ++ * it contains a "main" function with statically-shaped arguments; ++ the result types may be dynamically shaped. ++ * all the dynamic shapes depend only on the input shapes (no shape ++ dependency on the input array contents). We refer to the operations that ++ depend transitively only on the input shapes (e.g., as given by ++ `stablehlo.get_dimension_size`) as `dimension` operations. ++ All dimension values can be resolved to constants through inter-procedural ++ constant folding. ++ * intermediate functions may take a number of token arguments (of type ++ !stablehlo.token) at the start of the argument list, followed by some ++ dimension arguments (integer scalars). ++ * some intermediate functions may return dimension values. ++ E.g., the `floordiv` operation on dimension values may be implemented ++ using intermediate functions. These constant functions need to be ++ constant-folded. ++ * All the dynamic shapes can be resolved through shape inference from the ++ dimension values. The dimension values themselves do not depend on the ++ result of shape inference. ++ ++ ++For each intermediate function we compute a refinement context, including ++the values of the dimension arguments and the static shapes of the other ++arguments. We compute the refinement context when we encounter a function call, ++and then we refine the callee recursively. We abort in the presence of ++recursive calls. ++We also abort if a function is called with multiple distinct refinement ++contexts. ++ ++After refinement, all operations should have static shapes, all calls to ++constant functions are replaced with constants, and all dimension arguments ++for intermediate functions are dropped and are replaced with constants. ++*/ ++#include + #include + #include ++#include ++#include + #include + #include + +@@ -24,8 +63,10 @@ + #include "llvm/ADT/SmallSet.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/StringRef.h" ++#include "llvm/Support/Debug.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/FormatVariadic.h" ++#include "llvm/Support/ScopedPrinter.h" + #include "mlir/Dialect/Func/IR/FuncOps.h" + #include "mlir/IR/BuiltinAttributes.h" + #include "mlir/IR/BuiltinOps.h" +@@ -39,10 +80,13 @@ + #include "mlir/IR/Types.h" + #include "mlir/IR/Value.h" + #include "mlir/Interfaces/InferTypeOpInterface.h" ++#include "mlir/Support/DebugStringHelper.h" + #include "mlir/Support/LogicalResult.h" ++#include "mlir/Support/LLVM.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "stablehlo/dialect/Base.h" #include "stablehlo/dialect/ChloOps.h" @@ -1639,7 +2052,407 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl #include "stablehlo/dialect/StablehloOps.h" #include "stablehlo/dialect/TypeInference.h" #include "stablehlo/transforms/Passes.h" -@@ -844,12 +845,97 @@ +@@ -50,10 +94,144 @@ + namespace mlir { + namespace stablehlo { + ++#define DEBUG_TYPE "stablehlo-refine-shapes" ++ + #define GEN_PASS_DEF_STABLEHLOREFINESHAPESPASS + #include "stablehlo/transforms/Passes.h.inc" + + namespace { ++ ++// Per-module state for shape refinement. ++class RefineShapeState { ++ public: ++ // Validates that we are not attempting to refine a function with a different ++ // context than previously, and are not attempting recursive refinement. ++ // Returns failure() if validation fails. On success, returns a boolean ++ // that specifies whether the function has already been refined. ++ FailureOr validateFunctionRefinement( ++ func::FuncOp func, SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes) { ++ StringRef funcName = func.getName(); ++ auto found = refinementContexts.find(func); ++ if (found == refinementContexts.end()) { ++ return false; // not already refined. ++ } ++ auto prevDimensionArguments = std::get<0>(found->second); ++ auto prevNonDimensionArgumentTypes = std::get<1>(found->second); ++ // Since we refine until fixed point, we will refine a call to a function ++ // both for the original function and for the refined one. In the latter ++ // case, we should have empty dimensionArguments but the same ++ // nonDimensionArgumentTypes. ++ if (prevNonDimensionArgumentTypes != nonDimensionArgumentTypes || ++ (!dimensionArguments.empty() && ++ prevDimensionArguments != dimensionArguments)) { ++ emitDifferentRefinementContextError( ++ func, /*dimensionArguments=*/dimensionArguments, ++ /*nonDimensionArgumentTypes=*/nonDimensionArgumentTypes, ++ /*prevDimensionArguments=*/prevDimensionArguments, ++ /*prevNonDimensionArgumentShapes=*/prevNonDimensionArgumentTypes); ++ return failure(); ++ } ++ for (auto funcOnStack : functionsBeingRefined) { ++ if (funcOnStack == funcName) { ++ func.emitOpError() << "Function " << funcName ++ << " is being refined recursively\n"; ++ return failure(); ++ } ++ } ++ return true; // already refined. ++ } ++ ++ // Updates the state to signal the starting of a function refinement. ++ // Callers must call `finishFunctionRefinement` when done. ++ void startFunctionRefinement(func::FuncOp func, ++ SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes) { ++ StringRef funcName = func.getName(); ++ functionsBeingRefined.push_back(funcName); ++ refinementContexts[func] = ++ std::make_tuple(dimensionArguments, nonDimensionArgumentTypes); ++ } ++ ++ // Updates the state to signal the starting of a function refinement. ++ LogicalResult finishFunctionRefinement(func::FuncOp func) { ++ if (func.getName() != ++ functionsBeingRefined[functionsBeingRefined.size() - 1]) { ++ func.emitOpError() << "Expected to find " << func.getName() ++ << " at the top of the stack"; ++ return failure(); ++ } ++ functionsBeingRefined.pop_back(); ++ return success(); ++ } ++ ++ private: ++ // Maps refined functions to the refinement context: the values of dimension ++ // arguments and the types of non-dimension arguments. A function is added ++ // here when we start refining it. ++ DenseMap, SmallVector>> ++ refinementContexts; ++ ++ // A stack of functions that are in the process of being refined, the current ++ // one is last. ++ SmallVector functionsBeingRefined; ++ ++ void emitDifferentRefinementContextError( ++ func::FuncOp func, SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes, ++ SmallVector prevDimensionArguments, ++ SmallVector prevNonDimensionArgumentShapes) { ++ InFlightDiagnostic msg = func.emitOpError(); ++ msg << "Function " << func.getName() ++ << " has already been refined with a different " ++ "refinement context. "; ++ int countShowNonDimensionArguments = ++ std::min(prevNonDimensionArgumentShapes.size(), ++ nonDimensionArgumentTypes.size()); ++ if (prevNonDimensionArgumentShapes.size() != ++ nonDimensionArgumentTypes.size()) { ++ msg << "Previous context had " << prevNonDimensionArgumentShapes.size() ++ << " and now we have " << nonDimensionArgumentTypes.size() ++ << " non-dimension arguments. "; ++ } ++ msg << "The differences among the first " << countShowNonDimensionArguments ++ << " non-dimension argument types are: "; ++ for (auto i = 0; i < countShowNonDimensionArguments; ++i) { ++ if (prevNonDimensionArgumentShapes[i] != nonDimensionArgumentTypes[i]) { ++ msg << "Non-dimension argument[" << i << "] previously had type " ++ << debugString(prevNonDimensionArgumentShapes[i]) ++ << " and now has type " << debugString(nonDimensionArgumentTypes[i]) ++ << ". "; ++ } ++ } ++ int countShowDimensionArguments = ++ std::min(prevDimensionArguments.size(), dimensionArguments.size()); ++ if (prevDimensionArguments.size() != dimensionArguments.size()) { ++ msg << "Previous context had " << prevDimensionArguments.size() ++ << " and now we have " << dimensionArguments.size() ++ << " dimension arguments. "; ++ } ++ msg << "The differences among the first " << countShowDimensionArguments ++ << " dimension arguments are: "; ++ for (auto i = 0; i < countShowDimensionArguments; ++i) { ++ if (prevDimensionArguments[i] != dimensionArguments[i]) { ++ msg << "Dimension argument[" << i << "] previously was " ++ << prevDimensionArguments[i].getSExtValue() << " and now is " ++ << dimensionArguments[i].getSExtValue() << ". "; ++ } ++ } ++ } ++}; ++ ++// Refines a function. ++// Returns `true` if the function had already been processed with the same ++// refinement context and `false` if this is the first time we refined the ++// function. Returns failure() if we encounter an error. ++LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, ++ RefineShapeState* state, ++ size_t nrPrefixTokenArguments, ++ SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes); + + // DenseElementsAttr can be constructed from ArrayRef but not from + // ArrayRef. This helper bridges the gap. +@@ -424,11 +602,10 @@ + diag << "refineValues failed for " << types << ": expected " + << values.size() << " types, got " << types.size(); + }); +- +- // Check whether `types` contain any new information with respect to existing +- // return types. Even if just a single dimension size out of an entire tensor +- // type got updated, using `inferMostSpecificType` ensures that we don't +- // miss that. ++ // Check whether `types` contain any new information with respect to ++ // existing return types. Even if just a single dimension size out of an ++ // entire tensor type got updated, using `inferMostSpecificType` ensures ++ // that we don't miss that. + bool needsRefinement = false; + SmallVector refinedTypes; + for (auto it : llvm::zip(values.getTypes(), types)) { +@@ -468,11 +645,13 @@ + + // Simply changing operand type of `func.return` won't work because + // that won't update the FunctionType of the enclosing `func.func`. +- // Nonetheless, we still want to support these ops because they are widely +- // used in StableHLO programs (although the plan of record is to replace +- // `func.return` ops in StableHLO programs with `stablehlo.return`: +- // https://github.com/openxla/stablehlo/issues/425). ++ // Nonetheless, we still want to support these ops because they are ++ // widely used in StableHLO programs (although the plan of record is to ++ // replace `func.return` ops in StableHLO programs with ++ // `stablehlo.return`: https://github.com/openxla/stablehlo/issues/425). + if (isa(user)) continue; ++ ++ if (isa(user)) continue; + + // Unlike in TensorFlow's type inference pass, here we work only with + // allowlisted ops to focus our support on well-defined semantics of +@@ -489,7 +668,8 @@ + value.setType(refinedType); + + // Special case: for `func.return`, guard the refinement with a cast +- // and leave propagation of the refined return type to a dedicated pattern. ++ // and leave propagation of the refined return type to a dedicated ++ // pattern. + auto isFuncReturn = [](OpOperand& use) -> bool { + return isa(use.getOwner()); + }; +@@ -505,8 +685,8 @@ + + // Refines the return types of the given operation using the given types. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, + ArrayRef types) { + if (failed(refineValues(rewriter, op, op->getResults(), types))) +@@ -528,12 +708,12 @@ + // traversal, and only then we apply the refinements. If there are other + // types, then the corresponding refinements must be completely empty. + // 2) Encodings are not supported. In principle, TypeExtensions should be +-// supportable, but this needs careful thinking through. Given that no one +-// asked for support for bounded dynamism in this pass yet, this is left +-// for future work. ++// supportable, but this needs careful thinking through. Given that no ++// one asked for support for bounded dynamism in this pass yet, this is ++// left for future work. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, + ArrayRef refinements) { + SmallVector flattenedTypes; +@@ -623,8 +803,8 @@ + + // Refines the return type of the given operation using the given shape. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + template + LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, + ArrayRef shape) { +@@ -633,8 +813,8 @@ + + // Refines the return type of the given operation using the given shape. + // This function also signals PatternRewriter that it needs to visit all the +-// users of this op if any updates to its results have happened during execution +-// of the function. ++// users of this op if any updates to its results have happened during ++// execution of the function. + template + LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, + Value shapeValue) { +@@ -647,6 +827,52 @@ + return refineReturnShape(rewriter, op, shape); + } + ++// Dimension arguments are leading scalar constant arguments, optionally ++// preceeded by some stablehlo.token arguments. ++SmallVector getDimensionArguments(func::CallOp callOp, ++ size_t* nrPrefixTokenArguments) { ++ *nrPrefixTokenArguments = 0; ++ SmallVector operands = callOp.getOperands(); ++ SmallVector dimensionArguments; ++ for (size_t i = 0; i < operands.size(); ++i) { ++ if (i == *nrPrefixTokenArguments && isa(operands[i].getType())) { ++ (*nrPrefixTokenArguments)++; ++ continue; ++ } ++ RankedTensorType operandType = ++ dyn_cast(operands[i].getType()); ++ if (!operandType || operandType.getRank() != 0 || ++ !operandType.getElementType().template isa()) ++ break; ++ SmallVector operand_int; ++ if (failed(hlo::matchInts(operands[i], operand_int))) { ++ break; ++ } ++ dimensionArguments.push_back(operand_int[0]); ++ } ++ return dimensionArguments; ++} ++ ++std::optional> isConstantFunction( ++ func::FuncOp func) { ++ LLVM_DEBUG(llvm::dbgs() << "check if " << func.getName() ++ << " is a constant function\n"); ++ SmallVector returnedConstants; ++ func::ReturnOp ret = *func.getOps().begin(); ++ bool isConstant = llvm::all_of(ret->getOperands(), [&](auto returnVal) { ++ DenseIntElementsAttr attr; ++ Operation* return_operand_def = returnVal.getDefiningOp(); ++ if (return_operand_def && ++ matchPattern(return_operand_def, m_Constant(&attr))) { ++ returnedConstants.push_back(attr); ++ return true; ++ } ++ return false; ++ }); ++ if (isConstant) return returnedConstants; ++ return std::nullopt; ++} ++ + struct RefineAllGatherOpPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(AllGatherOp op, +@@ -655,9 +881,9 @@ + if (!operandType.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked operand type"); + +- // This represents the cross_replica_and_partition process grouping strategy +- // that requires num_partitions to compute shardCount. Since we don't know +- // num_partitions at this point, we error out. ++ // This represents the cross_replica_and_partition process grouping ++ // strategy that requires num_partitions to compute shardCount. Since we ++ // don't know num_partitions at this point, we error out. + if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) + return rewriter.notifyMatchFailure(op, "unsupported strategy"); + DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); +@@ -678,12 +904,11 @@ + auto operandType = op.getOperand().getType(); + if (!operandType.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked operand type"); +- ++ auto resultType = op.getType(); + // If bit widths of the operand and the result are different, then + // operand and result shapes have different ranks. + // This complicates the logic quite a bit and is not needed to pass the + // current tests, so we leave this for future work. +- auto resultType = op.getType(); + auto getBitWidthFn = [](ShapedType type) { + auto elementType = type.getElementType(); + if (auto complexType = elementType.dyn_cast()) +@@ -694,8 +919,77 @@ + if (getBitWidthFn(operandType) != getBitWidthFn(resultType)) + return rewriter.notifyMatchFailure(op, "unsupported bit width"); + +- return refineReturnShape(rewriter, op, operandType.getShape()); +- } ++ auto res = refineReturnShape(rewriter, op, operandType.getShape()); ++ if (failed(res)) return failure(); ++ if (op.getOperand().getType() == op.getResult().getType()) { ++ LLVM_DEBUG({ llvm::dbgs() << " ** remove no-op bitcast convert\n"; }); ++ rewriter.replaceOp(op, op.getOperand()); ++ } ++ return success(); ++ } ++}; ++ ++struct RefineCallOpPattern : public OpRewritePattern { ++ using OpRewritePattern::OpRewritePattern; ++ ++ RefineCallOpPattern(MLIRContext* context, RefineShapeState* state) ++ : OpRewritePattern(context), _state(state) {} ++ ++ LogicalResult matchAndRewrite(func::CallOp op, ++ PatternRewriter& rewriter) const override { ++ LLVM_DEBUG({ llvm::dbgs() << "refineCallOp " << debugString(op) << "\n"; }); ++ ++ // We have a number of prefix token arguments, then the dimension arguments ++ size_t nrPrefixTokenArguments = 0; ++ SmallVector dimensionArguments = ++ getDimensionArguments(op, &nrPrefixTokenArguments); ++ SmallVector nonDimensionArgumentTypes; ++ SmallVector nonDimensionArguments; ++ SmallVector operands = op.getOperands(); ++ for (size_t i = 0; i < operands.size(); ++i) { ++ // Skip the dimension arguments. ++ if (i >= nrPrefixTokenArguments && ++ i < nrPrefixTokenArguments + dimensionArguments.size()) { ++ continue; ++ } ++ nonDimensionArgumentTypes.push_back(operands[i].getType()); ++ nonDimensionArguments.push_back(operands[i]); ++ } ++ FlatSymbolRefAttr calleeName = op.getCalleeAttr(); ++ const SymbolTable symbolTable(op->getParentOfType()); ++ func::FuncOp callee = dyn_cast( ++ symbolTable.lookupNearestSymbolFrom(op, calleeName.getAttr())); ++ if (!callee) ++ return rewriter.notifyMatchFailure( ++ op, "cannot find callee in the current scope"); ++ if (failed(refineFunction(callee, rewriter.getContext(), _state, ++ nrPrefixTokenArguments, dimensionArguments, ++ nonDimensionArgumentTypes))) ++ return failure(); ++ ++ // Is the callee a constant function in this refinement context? ++ std::optional> constantAttrs = ++ isConstantFunction(callee); ++ if (constantAttrs.has_value()) { ++ SmallVector constants; ++ for (auto constAttr : constantAttrs.value()) { ++ constants.push_back( ++ rewriter.create(op.getLoc(), constAttr)); ++ } ++ rewriter.replaceOp(op, constants); ++ return success(); ++ } ++ if (!dimensionArguments.empty()) { ++ // Drop the dimension arguments, but only if necessary, or else we ++ // will end up trying to refine the new CallOp forever. ++ op = rewriter.replaceOpWithNewOp( ++ op, op.getResultTypes(), callee.getSymName(), nonDimensionArguments); ++ } ++ return refineReturnTypes(rewriter, op, callee.getResultTypes()); ++ } ++ ++ private: ++ RefineShapeState* _state; + }; + + struct RefineConvertOpPattern : public OpRewritePattern { +@@ -844,12 +1138,98 @@ } }; @@ -1712,8 +2525,9 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl + return rewriter.notifyMatchFailure(op, "expected constant output_shape"); + + // We only need to refine the shape of `output` (the second result). -+ // The shape of `output_state` (the first result) is determined by the shape -+ // of `initial_state`, so we ignore it and provide an empty refinement. ++ // The shape of `output_state` (the first result) is determined by the ++ // shape of `initial_state`, so we ignore it and provide an empty ++ // refinement. + return refineReturnTypes(rewriter, op, {{initialStateType}, {outputShape}}); + } +}; @@ -1737,15 +2551,349 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl } }; -@@ -1181,7 +1267,10 @@ - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); -+ patterns.add(&getContext()); - patterns.add(&getContext()); -+ patterns.add(&getContext()); -+ patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); +@@ -865,11 +1245,11 @@ + if (!isa(op->getDialect())) + return rewriter.notifyMatchFailure(op, "unsupported dialect"); + +- // For the ops that implement InferTypeOpInterface, we reinfer their return +- // types and see what happens. +- // Operands of these ops might have been refined elsewhere (e.g. someone +- // might have updated argument types of a function) or earlier during this +- // pass, and this might enable refinement opportunities downstream. ++ // For the ops that implement InferTypeOpInterface, we reinfer their ++ // return types and see what happens. Operands of these ops might have ++ // been refined elsewhere (e.g. someone might have updated argument types ++ // of a function) or earlier during this pass, and this might enable ++ // refinement opportunities downstream. + SmallVector inferredReturnTypes; + if (failed(op.inferReturnTypes(getContext(), /*location=*/{}, + op->getOperands(), op->getAttrDictionary(), +@@ -925,8 +1305,8 @@ + sliceSizesAttr.size(), + RankedTensorType::get({}, startIndicesElementType)); + +- // RealDynamicSliceOp can take tensors of integer or index element types. +- // DynamicSliceOp::slice_sizes only supports i64 element type. ++ // RealDynamicSliceOp can take tensors of integer or index element ++ // types. DynamicSliceOp::slice_sizes only supports i64 element type. + // Adapt accordingly in order to be compatible with inferDynamicSliceOp. + SmallVector sliceSizes; + for (auto element : sliceSizesAttr.getValues()) { +@@ -956,9 +1336,9 @@ + if (!operandType.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked operand type"); + +- // This represents the cross_replica_and_partition process grouping strategy +- // that requires num_partitions to compute shardCount. Since we don't know +- // num_partitions at this point, we error out. ++ // This represents the cross_replica_and_partition process grouping ++ // strategy that requires num_partitions to compute shardCount. Since we ++ // don't know num_partitions at this point, we error out. + if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) + return rewriter.notifyMatchFailure(op, "unsupported strategy"); + DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); +@@ -998,9 +1378,9 @@ + PatternRewriter& rewriter) const override { + // Push the potentially refined operand types into the nested regions. + // This can lead to refinements of the return types of the body (but not +- // of the cond since it always returns tensor), but the key insight here +- // is that the enclosing while op doesn't care about these refinements +- // (because its return types are equal to its operand types). ++ // of the cond since it always returns tensor), but the key insight ++ // here is that the enclosing while op doesn't care about these ++ // refinements (because its return types are equal to its operand types). + // If we end up with incompatibilities between while's return types and + // body's return types, the verifier will tell us about that. This means + // that the original program wasn't well-formed. TODO(burmako): Implement +@@ -1050,8 +1430,8 @@ + if (failed(mostSpecificType) || destType == *mostSpecificType) continue; + + // If the source type of the cast is more specific than the target type, +- // then we conclude that the cast is redundant (i.e. needs to be removed) +- // and that the return type of the function needs an update. ++ // then we conclude that the cast is redundant (i.e. needs to be ++ // removed) and that the return type of the function needs an update. + needsUpdate = true; + updatedResultTypes[i] = sourceType; + +@@ -1066,9 +1446,6 @@ + for (auto cast : castsToReplace) + rewriter.replaceOp(cast, cast->getOperands()); + +- // If the type of the enclosing `func.func` needs an update, we simply +- // call setType. We can afford this simplicity because our algorithm +- // currently supports only one function per module. + auto func = cast(op->getParentOp()); + func.setType( + rewriter.getFunctionType(func.getArgumentTypes(), updatedResultTypes)); +@@ -1100,22 +1477,186 @@ + } + }; + ++LogicalResult applyRewritePatterns(func::FuncOp func, MLIRContext* context, ++ RefineShapeState* state) { ++ // TODO(#1048): Find out why .maxIterations = 1 no longer works. ++ // There have been recent refactors to applyPatternsAndFoldGreedily ++ // upstream, and that might be the reason. ++ GreedyRewriteConfig config; ++ config.useTopDownTraversal = true; ++ config.enableRegionSimplification = true; ++ config.maxIterations = 2; ++ config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; ++ config.strictMode = GreedyRewriteStrictness::AnyOp; ++ ++ RewritePatternSet patterns(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context, state); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ patterns.add(context); ++ if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { ++ func.emitOpError() << "applyPatternsAndFoldGreedily failed"; ++ return failure(); ++ } ++ return success(); ++} ++ ++LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, ++ RefineShapeState* state, ++ size_t nrPrefixTokenArguments, ++ SmallVector dimensionArguments, ++ SmallVector nonDimensionArgumentTypes) { ++ // The nonDimensionArgumentTypes include the prefix token arguments. ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() << ": initial type " ++ << debugString(func.getFunctionType()) << "\n"; ++ llvm::dbgs() << " has " << nrPrefixTokenArguments << " prefix tokens\n"; ++ for (size_t i = 0; i < dimensionArguments.size(); ++i) { ++ llvm::dbgs() << " with dimension arg[" << i ++ << "] = " << dimensionArguments[i] << "\n"; ++ } ++ }); ++ // Check that the argument types have static shapes. ++ for (size_t i = 0; i < nonDimensionArgumentTypes.size(); ++i) { ++ if (i < nrPrefixTokenArguments) continue; ++ auto argType = nonDimensionArgumentTypes[i]; ++ if (isa(argType)) continue; ++ auto argRankedTensorType = dyn_cast(argType); ++ if (!argRankedTensorType || !argRankedTensorType.hasStaticShape()) { ++ func.emitOpError() << func.getName() ++ << " must be refined with static shape arguments. " ++ << "Found argument of type " << debugString(argType); ++ return failure(); ++ } ++ } ++ auto alreadyRefined = state->validateFunctionRefinement( ++ func, dimensionArguments, nonDimensionArgumentTypes); ++ if (failed(alreadyRefined)) { ++ return failure(); ++ } ++ if (*alreadyRefined) { ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() ++ << ": skipping, already refined\n"; ++ }); ++ return success(); ++ } ++ state->startFunctionRefinement(func, dimensionArguments, ++ nonDimensionArgumentTypes); ++ // Only one block per function is supported at the moment. ++ // At the StableHLO level, functions are expected to only have one block, ++ // so supporting more is out of scope for this pass. ++ if (!func.getRegion().hasOneBlock()) { ++ func.emitOpError() << "must have exactly one block"; ++ return failure(); ++ } ++ ++ // Replace all dimension arguments with constants and remove those arguments. ++ // Wrap non-dimension arguments with bitcast_convert. ++ OpBuilder op_builder(func.getRegion()); ++ op_builder.setInsertionPointToStart(&func.getRegion().front()); ++ size_t firstNonDimensionArg = ++ nrPrefixTokenArguments + dimensionArguments.size(); ++ for (size_t i = 0; i < func.getNumArguments(); ++i) { ++ BlockArgument arg = func.getArgument(i); ++ Type argType = arg.getType(); ++ if (i < nrPrefixTokenArguments) { ++ continue; ++ } ++ if (i < firstNonDimensionArg) { ++ ShapedType argShapedType = dyn_cast(argType); ++ if (!argShapedType) { ++ func.emitOpError() << "dimension arguments must have shaped types"; ++ return failure(); ++ } ++ // We will drop the dimension arguments, replace them with constants. ++ auto replacement_op = op_builder.create( ++ arg.getLoc(), argType, ++ getTensorAttr(argShapedType, ++ dimensionArguments[i - nrPrefixTokenArguments])); ++ arg.replaceAllUsesWith(replacement_op); ++ } else { ++ int nonDimensionArgumentIndex = ++ nrPrefixTokenArguments + i - firstNonDimensionArg; ++ Type refinedType = nonDimensionArgumentTypes[nonDimensionArgumentIndex]; ++ if (refinedType != argType) { ++ // We add BitcastConvertOp as the only uses of the non-dimension ++ // arguments to ensure the module stays valid after we set the argument ++ // type. ++ auto replacement_op = op_builder.create( ++ arg.getLoc(), argType, arg); ++ arg.replaceAllUsesExcept(replacement_op->getResult(0), replacement_op); ++ arg.setType(refinedType); ++ } ++ } ++ } ++ BitVector argIndices(func.getNumArguments()); ++ argIndices.set(nrPrefixTokenArguments, firstNonDimensionArg); ++ func.eraseArguments(argIndices); ++ func.setType(op_builder.getFunctionType(nonDimensionArgumentTypes, ++ func.getResultTypes())); ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() << ": set type to " ++ << func.getFunctionType() << "\n"; ++ }); ++ if (failed(applyRewritePatterns(func, context, state))) return failure(); ++ LLVM_DEBUG({ ++ llvm::dbgs() << "refineFunction " << func.getName() << ": end with type " ++ << debugString(func.getFunctionType()) << "\n"; ++ }); ++ if (failed(state->finishFunctionRefinement(func))) return failure(); ++ return success(); ++} ++ + struct StablehloRefineShapesPass + : public impl::StablehloRefineShapesPassBase { + using StablehloRefineShapesPassBase::StablehloRefineShapesPassBase; + + void runOnOperation() override { +- // Only one function per module is supported at the moment to avoid the need +- // to think about iterative type inference algorithms. +- // Current use cases are served well by inlining multiple functions into +- // a single function, so we leave native support for multiple functions to +- // future work. + // To enable modules that contain CustomCallOp::called_computations, + // we allow multiple functions, in which case we only refine the main + // function called "main", assuming that the called computations will have + // static shapes. Lifting this assumption and expanding refinement to + // multiple functions is left for future work. + ModuleOp module = getOperation(); ++ RefineShapeState state; + auto funcs = llvm::to_vector(module.getOps()); + if (funcs.empty()) return; + func::FuncOp func; +@@ -1130,70 +1671,14 @@ + << " function to clearly identify which function will be refined"; + return signalPassFailure(); + } +- +- // Similarly, only one block per function is supported at the moment. +- // At the StableHLO level, functions are expected to only have one block, +- // so supporting more is out of scope for this pass. +- if (!func.getRegion().hasOneBlock()) { +- func.emitOpError() << "must have exactly one block"; ++ SmallVector emptyDimensionArguments; ++ SmallVector nonDimensionArgumentTypes; ++ for (auto arg : func.getArguments()) ++ nonDimensionArgumentTypes.push_back(arg.getType()); ++ if (failed(refineFunction(func, &getContext(), &state, 0, ++ emptyDimensionArguments, ++ nonDimensionArgumentTypes))) + return signalPassFailure(); +- } +- +- // The algorithm behind this pass consists of a single traversal of the +- // function. This is sufficient because we only support one function per +- // program at the moment. +- // TODO(#1048): Find out why .maxIterations = 1 no longer works. +- // There have been recent refactors to applyPatternsAndFoldGreedily +- // upstream, and that might be the reason. +- GreedyRewriteConfig config; +- config.useTopDownTraversal = true; +- config.enableRegionSimplification = true; +- config.maxIterations = 2; +- config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; +- config.strictMode = GreedyRewriteStrictness::AnyOp; +- +- RewritePatternSet patterns(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- patterns.add(&getContext()); +- if (failed( +- applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { +- return signalPassFailure(); +- } + } + }; + diff --git a/third_party/xla/xla/python/refine_polymorphic_shapes.cc b/third_party/xla/xla/python/refine_polymorphic_shapes.cc index 063b15aba58062..44ce5b20e39d9e 100644 --- a/third_party/xla/xla/python/refine_polymorphic_shapes.cc +++ b/third_party/xla/xla/python/refine_polymorphic_shapes.cc @@ -251,10 +251,6 @@ absl::Status RefinePolymorphicShapes(mlir::ModuleOp module, pm.enableIRPrinting(print_before, print_after, /*printModuleScope=*/true, /*printAfterOnlyOnChange=*/true); } - - // TODO(necula): we should not need the inliner. - pm.addPass(mlir::createInlinerPass()); - pm.addPass(mlir::createCSEPass()); pm.addPass(mlir::stablehlo::createStablehloRefineShapesPass()); pm.addNestedPass( mlir::stablehlo::createStablehloCanonicalizeDynamismPass()); From de0cbb9bb70515f75bde5ff87b7d470096e7aa49 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 21 Sep 2023 05:11:23 -0700 Subject: [PATCH 080/567] [XLA:GPU] Clean up Target util. We have some differences between Triton codegen and other fusion codegen, namely for Remainder/Fmod and Cbrt. Unify that. - Remove two unused math functions. - Add mapping from kRemainder to kFmod. - Use kCbrt device function in elemental_ir_emitter. PiperOrigin-RevId: 567274915 --- third_party/xla/xla/service/gpu/elemental_ir_emitter.cc | 6 ++++++ third_party/xla/xla/service/gpu/elemental_ir_emitter.h | 3 +++ third_party/xla/xla/service/gpu/target_util.cc | 8 ++------ third_party/xla/xla/service/gpu/target_util.h | 4 +--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/third_party/xla/xla/service/gpu/elemental_ir_emitter.cc b/third_party/xla/xla/service/gpu/elemental_ir_emitter.cc index 352d1746459ce8..6955d3050e869a 100644 --- a/third_party/xla/xla/service/gpu/elemental_ir_emitter.cc +++ b/third_party/xla/xla/service/gpu/elemental_ir_emitter.cc @@ -326,6 +326,12 @@ StatusOr GpuElementalIrEmitter::EmitComplexAbs( {prim_type, prim_type}, prim_type); } +StatusOr GpuElementalIrEmitter::EmitCbrt(PrimitiveType prim_type, + llvm::Value* value) { + return EmitDeviceMathCall(TargetDeviceFunctionID::kCbrt, {value}, {prim_type}, + prim_type); +} + llvm::Value* GpuElementalIrEmitter::EmitThreadId() { llvm::Value* block_id = IntCast( EmitCallToTargetIntrinsic(TargetIntrinsicID::kBlockIdx, {}, {}, b()), diff --git a/third_party/xla/xla/service/gpu/elemental_ir_emitter.h b/third_party/xla/xla/service/gpu/elemental_ir_emitter.h index 2e3e3da57575dd..f97861ba2e7afc 100644 --- a/third_party/xla/xla/service/gpu/elemental_ir_emitter.h +++ b/third_party/xla/xla/service/gpu/elemental_ir_emitter.h @@ -87,6 +87,9 @@ class GpuElementalIrEmitter : public ElementalIrEmitter { StatusOr EmitComplexAbs(PrimitiveType prim_type, llvm::Value* value) override; + StatusOr EmitCbrt(PrimitiveType prim_type, + llvm::Value* value) override; + StatusOr> EmitThreadLocalCall( const HloComputation& callee, absl::Span parameters, absl::string_view, bool /*is_reducer*/) override; diff --git a/third_party/xla/xla/service/gpu/target_util.cc b/third_party/xla/xla/service/gpu/target_util.cc index 97ca6388804521..12b12b3f19faec 100644 --- a/third_party/xla/xla/service/gpu/target_util.cc +++ b/third_party/xla/xla/service/gpu/target_util.cc @@ -127,9 +127,6 @@ struct TargetDeviceFunction GetDeviceFunctionRoot( case TargetDeviceFunctionID::kCos: { return {"__nv_cos", "__ocml_cos"}; } - case TargetDeviceFunctionID::kErfcinv: { - return {"__nv_erfcinv", "__ocml_erfcinv"}; - } case TargetDeviceFunctionID::kExp: { return {"__nv_exp", "__ocml_exp"}; } @@ -151,9 +148,6 @@ struct TargetDeviceFunction GetDeviceFunctionRoot( case TargetDeviceFunctionID::kPow: { return {"__nv_pow", "__ocml_pow"}; } - case TargetDeviceFunctionID::kRound: { - return {"__nv_round", "__ocml_round"}; - } case TargetDeviceFunctionID::kRsqrt: { return {"__nv_rsqrt", "__ocml_rsqrt"}; } @@ -192,6 +186,8 @@ StatusOr GetTargetDeviceFunctionID(HloOpcode op) { return TargetDeviceFunctionID::kLog1p; case HloOpcode::kPower: return TargetDeviceFunctionID::kPow; + case HloOpcode::kRemainder: + return TargetDeviceFunctionID::kFmod; case HloOpcode::kRsqrt: return TargetDeviceFunctionID::kRsqrt; case HloOpcode::kSin: diff --git a/third_party/xla/xla/service/gpu/target_util.h b/third_party/xla/xla/service/gpu/target_util.h index 981e316ae1b68a..19b00307b3fea1 100644 --- a/third_party/xla/xla/service/gpu/target_util.h +++ b/third_party/xla/xla/service/gpu/target_util.h @@ -48,8 +48,8 @@ enum class TargetIntrinsicID { // Enumeration to get target specific device math function. enum class TargetDeviceFunctionID { kAtan2 = 0, + kCbrt, kCos, - kErfcinv, kExp, kExpm1, kFmod, @@ -57,13 +57,11 @@ enum class TargetDeviceFunctionID { kLog, kLog1p, kPow, - kRound, kRsqrt, kSin, kSqrt, kTan, kTanh, - kCbrt, }; // HLO opcode -> TargetDeviceFunctionID mapping. From e20ace297d609e5f182f51a5daf8fe1d116bfbe5 Mon Sep 17 00:00:00 2001 From: Andrew Goodbody Date: Thu, 21 Sep 2023 13:21:29 +0100 Subject: [PATCH 081/567] Fix permission denied on cp of headers Remove the duplication of copies that can result in permission denied --- tensorflow/tools/pip_package/build_pip_package.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index deff63b8d15557..a77aa82a21c5aa 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -254,13 +254,8 @@ function prepare_src() { fi fi - # Move headers from TSL/XLA into tensorflow so that InstallHeaders can move - # them back into tensorflow/include - cp -rL bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_tsl/tsl/ ${TMPDIR}/tensorflow - cp -rL bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_xla/xla/ ${TMPDIR}/tensorflow/compiler # Move vendored files into proper locations # This is required because TSL/XLA don't publish their own wheels - # TODO(jakeharmon): These two copy statements may no longer be necessary cp -rL bazel-bin/external/local_tsl/tsl/ ${TMPDIR}/tensorflow cp -rL bazel-bin/external/local_xla/xla/ ${TMPDIR}/tensorflow/compiler # Fix the proto stubs From 4511d6f62cc70359482145d0feea2ae583b18b2f Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Thu, 21 Sep 2023 06:51:56 -0700 Subject: [PATCH 082/567] [XLA:GPU] Avoid unnecessary autotuning of tiny Triton GEMM fusions. PiperOrigin-RevId: 567293724 --- third_party/xla/xla/service/gpu/BUILD | 8 +++ .../xla/xla/service/gpu/triton_autotuner.cc | 17 +++-- .../xla/xla/service/gpu/triton_autotuner.h | 6 +- .../xla/service/gpu/triton_autotuner_test.cc | 71 ++++++++++++++++--- 4 files changed, 85 insertions(+), 17 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 374ba47563aa10..8646507b0eef2f 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -673,6 +673,7 @@ cc_library( "//xla:util", "//xla:xla_proto_cc", "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_query", "//xla/service:dump", "//xla/service:executable", "//xla/service:float_normalization", @@ -710,7 +711,9 @@ xla_test( ":gemm_rewriter_triton", ":triton_autotuner", "//xla:autotuning_proto_cc", + "//xla:error_spec", "//xla:shape_util", + "//xla:xla_data_proto_cc", "//xla:xla_proto_cc", "//xla/hlo/ir:hlo", "//xla/service:hlo_pass_pipeline", @@ -721,10 +724,15 @@ xla_test( "//xla/tests:test_utils", "//xla/tests:verified_hlo_module", "//xla/tests:xla_internal_test_main", # fixdeps: keep + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", "@local_tsl//tsl/lib/core:status_test_util", + "@local_tsl//tsl/platform:env", + "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:platform_port", + "@local_tsl//tsl/platform:statusor", ], ) diff --git a/third_party/xla/xla/service/gpu/triton_autotuner.cc b/third_party/xla/xla/service/gpu/triton_autotuner.cc index 7e56199473a547..96ed7527326acb 100644 --- a/third_party/xla/xla/service/gpu/triton_autotuner.cc +++ b/third_party/xla/xla/service/gpu/triton_autotuner.cc @@ -45,6 +45,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_instructions.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/hlo/ir/hlo_opcode.h" +#include "xla/hlo/utils/hlo_query.h" #include "xla/service/dump.h" #include "xla/service/executable.h" #include "xla/service/float_normalization.h" @@ -212,14 +213,12 @@ class GemmConfigSetCollector : public ConstDfsHloVisitorWithDefault { private: GemmConfigSet GetGemmConfigSet(const HloFusionInstruction* fusion) { - const HloComputation& fusion_computation = - *fusion->called_computations().at(0); - const HloInstruction& fusion_root = *fusion_computation.root_instruction(); const DebugOptions& debug_options = fusion->GetModule()->config().debug_options(); se::StreamExecutor* stream_exec = config_.GetExecutor(); return {GetPossibleMatmulAutotuneConfigs( - fusion_root, + *Cast(hlo_query::GetFirstInstructionWithOpcode( + *fusion->called_computations().at(0), HloOpcode::kDot)), stream_exec->GetDeviceDescription().cuda_compute_capability(), debug_options, config_.ExhaustiveTilingSearch())}; } @@ -739,9 +738,15 @@ Status Autotune(const AutotuneConfig& config, AutotunerCompileUtil& util, } // anonymous namespace std::vector GetPossibleMatmulAutotuneConfigs( - const HloInstruction& instr, + const HloDotInstruction& dot, const se::CudaComputeCapability compute_capability, const DebugOptions& debug_options, bool exhaustive_tiling_search) { + // Avoid autotuning tiny fusions. + constexpr int kMinGemmElements = 32 * 32; + if (ShapeUtil::ElementsIn(dot.operand(0)->shape()) <= kMinGemmElements && + ShapeUtil::ElementsIn(dot.operand(1)->shape()) <= kMinGemmElements) { + return {GemmKey(32, 32, 32, 1, 1, 4)}; + } // Split-K optimization enables more even utilization of a GPU in cases // where tiling just the non-contracting dimensions of a GEMM does not create // a sufficient number of thread block programs to occupy all available cores. @@ -756,7 +761,7 @@ std::vector GetPossibleMatmulAutotuneConfigs( debug_options.xla_gpu_enable_split_k_autotuning() ? std::max(1L, kSufficientNumberOfTiles * kMaxTileSize * kMaxTileSize / - ShapeUtil::ElementsIn(instr.shape())) + ShapeUtil::ElementsIn(dot.shape())) : 1; return exhaustive_tiling_search ? GetExhaustiveMatmulAutotuneConfigs(compute_capability, diff --git a/third_party/xla/xla/service/gpu/triton_autotuner.h b/third_party/xla/xla/service/gpu/triton_autotuner.h index 912c8149aea5fd..bff2f170e6e7b2 100644 --- a/third_party/xla/xla/service/gpu/triton_autotuner.h +++ b/third_party/xla/xla/service/gpu/triton_autotuner.h @@ -21,9 +21,13 @@ limitations under the License. #include "absl/strings/string_view.h" #include "xla/autotuning.pb.h" #include "xla/hlo/ir/hlo_computation.h" +#include "xla/hlo/ir/hlo_instructions.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/service/gpu/autotuner_util.h" #include "xla/service/hlo_pass_interface.h" +#include "xla/statusor.h" +#include "xla/stream_executor/device_description.h" +#include "xla/xla.pb.h" #include "tsl/platform/threadpool.h" namespace xla { @@ -51,7 +55,7 @@ class TritonAutotuner : public HloModulePass { // TODO(b/266210099): have a way to generate/load these dynamically. // Returns a list of possible tilings for a GEMM performed in Triton. std::vector GetPossibleMatmulAutotuneConfigs( - const HloInstruction& instr, se::CudaComputeCapability compute_capability, + const HloDotInstruction& dot, se::CudaComputeCapability compute_capability, const DebugOptions& debug_options, bool exhaustive_tiling_search = false); } // namespace gpu diff --git a/third_party/xla/xla/service/gpu/triton_autotuner_test.cc b/third_party/xla/xla/service/gpu/triton_autotuner_test.cc index 7a22da6e421b26..217f030519719d 100644 --- a/third_party/xla/xla/service/gpu/triton_autotuner_test.cc +++ b/third_party/xla/xla/service/gpu/triton_autotuner_test.cc @@ -21,10 +21,16 @@ limitations under the License. #include #include +#include #include +#include "absl/log/check.h" +#include "absl/log/log.h" #include "absl/strings/string_view.h" #include "xla/autotuning.pb.h" +#include "xla/error_spec.h" +#include "xla/hlo/ir/hlo_casting_utils.h" #include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_instructions.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/hlo/ir/hlo_opcode.h" #include "xla/service/gpu/autotuner_util.h" @@ -39,8 +45,13 @@ limitations under the License. #include "xla/tests/test_utils.h" #include "xla/tests/verified_hlo_module.h" #include "xla/xla.pb.h" +#include "xla/xla_data.pb.h" #include "tsl/lib/core/status_test_util.h" #include "tsl/platform/cpu_info.h" +#include "tsl/platform/env.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/statusor.h" +#include "tsl/platform/threadpool.h" namespace xla { namespace gpu { @@ -222,12 +233,20 @@ class TritonAutotunerTestWithMorePreciseReduction : public TritonAutotunerTest { }; TEST_F(TritonAutotunerTest, VoltaUsesNoMoreThanTwoStages) { + std::unique_ptr module = ParseAndReturnVerifiedModule(R"( +ENTRY e { + p0 = f32[1024,1024] parameter(0) + p1 = f32[1024,1024] parameter(1) + ROOT r = f32[1024,1024] dot(p0, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})") + .value(); const se::CudaComputeCapability compute_capability{ se::CudaComputeCapability::VOLTA, /*minor=*/0}; const std::vector configs = GetPossibleMatmulAutotuneConfigs( - *HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1024, 1024}), ""), + *Cast( + module->entry_computation()->root_instruction()), compute_capability, GetDebugOptionsForTest()); EXPECT_FALSE(std::any_of(configs.begin(), configs.end(), [](const AutotuneResult::TritonGemmKey& key) { @@ -236,12 +255,20 @@ TEST_F(TritonAutotunerTest, VoltaUsesNoMoreThanTwoStages) { } TEST_F(TritonAutotunerTest, AmpereUsesMoreThanTwoStages) { + std::unique_ptr module = ParseAndReturnVerifiedModule(R"( +ENTRY e { + p0 = f32[1024,1024] parameter(0) + p1 = f32[1024,1024] parameter(1) + ROOT r = f32[1024,1024] dot(p0, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})") + .value(); const se::CudaComputeCapability compute_capability{ se::CudaComputeCapability::AMPERE, /*minor=*/0}; const std::vector configs = GetPossibleMatmulAutotuneConfigs( - *HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1024, 1024}), ""), + *Cast( + module->entry_computation()->root_instruction()), compute_capability, GetDebugOptionsForTest()); EXPECT_TRUE(std::any_of(configs.begin(), configs.end(), [](const AutotuneResult::TritonGemmKey& key) { @@ -250,12 +277,20 @@ TEST_F(TritonAutotunerTest, AmpereUsesMoreThanTwoStages) { } TEST_F(TritonAutotunerTest, SmallOutputCanUseLargeSplitK) { + std::unique_ptr module = ParseAndReturnVerifiedModule(R"( +ENTRY e { + p0 = f32[1024,1024] parameter(0) + p1 = f32[1024,1024] parameter(1) + ROOT r = f32[1024,1024] dot(p0, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})") + .value(); const se::CudaComputeCapability compute_capability{ se::CudaComputeCapability::AMPERE, /*minor=*/0}; const std::vector configs = GetPossibleMatmulAutotuneConfigs( - *HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1024, 1024}), ""), + *Cast( + module->entry_computation()->root_instruction()), compute_capability, GetDebugOptionsForTest()); EXPECT_TRUE(std::any_of(configs.begin(), configs.end(), [](const AutotuneResult::TritonGemmKey& key) { @@ -264,12 +299,20 @@ TEST_F(TritonAutotunerTest, SmallOutputCanUseLargeSplitK) { } TEST_F(TritonAutotunerTest, LargeOutputDoesNotUseLargeSplitK) { + std::unique_ptr module = ParseAndReturnVerifiedModule(R"( +ENTRY e { + p0 = f32[20480,20480] parameter(0) + p1 = f32[20480,20480] parameter(1) + ROOT r = f32[20480,20480] dot(p0, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})") + .value(); const se::CudaComputeCapability compute_capability{ se::CudaComputeCapability::AMPERE, /*minor=*/0}; const std::vector configs = GetPossibleMatmulAutotuneConfigs( - *HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {20480, 20480}), ""), + *Cast( + module->entry_computation()->root_instruction()), compute_capability, GetDebugOptionsForTest()); EXPECT_FALSE(std::any_of(configs.begin(), configs.end(), [](const AutotuneResult::TritonGemmKey& key) { @@ -507,12 +550,20 @@ class TritonAutotunerDisableSplitK : public TritonAutotunerTest { }; TEST_F(TritonAutotunerDisableSplitK, SplitKIsDisabled) { + std::unique_ptr module = ParseAndReturnVerifiedModule(R"( +ENTRY e { + p0 = f32[1024,1024] parameter(0) + p1 = f32[1024,1024] parameter(1) + ROOT r = f32[1024,1024] dot(p0, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})") + .value(); const se::CudaComputeCapability compute_capability{ se::CudaComputeCapability::AMPERE, /*minor=*/0}; const std::vector configs = GetPossibleMatmulAutotuneConfigs( - *HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1024, 1024}), ""), + *Cast( + module->entry_computation()->root_instruction()), compute_capability, GetDebugOptionsForTest()); EXPECT_TRUE(std::all_of(configs.begin(), configs.end(), [](const AutotuneResult::TritonGemmKey& key) { From d95348224dc97baddfad977ef834cc2331f17787 Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Thu, 21 Sep 2023 07:32:12 -0700 Subject: [PATCH 083/567] Add python 3.12 to JAX docker containers PiperOrigin-RevId: 567302141 --- ...11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython | 2 ++ ....0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython | 2 ++ .../tools/ci_build/install/build_and_install_python.sh | 4 +++- .../install/install_pip_packages_by_version.sh | 10 +++++----- tensorflow/tools/toolchains/remote_config/configs.bzl | 8 ++++---- .../tools/toolchains/remote_config/containers.bzl | 4 ++-- .../tsl/tools/toolchains/remote_config/configs.bzl | 8 ++++---- .../tsl/tools/toolchains/remote_config/containers.bzl | 4 ++-- .../xla/tools/toolchains/remote_config/configs.bzl | 8 ++++---- .../xla/tools/toolchains/remote_config/containers.bzl | 4 ++-- 10 files changed, 30 insertions(+), 24 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython index 6f1888f23741ee..83223fde6e478c 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython @@ -38,9 +38,11 @@ COPY install/build_and_install_python.sh /install/ RUN /install/build_and_install_python.sh "3.9.4" RUN /install/build_and_install_python.sh "3.10.0" RUN /install/build_and_install_python.sh "3.11.0" +RUN /install/build_and_install_python.sh "3.12.0rc3" COPY install/install_pip_packages_by_version.sh /install/ # https://github.com/numpy/numpy/issues/22623 for `SETUPTOOLS_USE_DISTUTILS`. RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" "jax" RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10" "jax" RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.11" "jax" +RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.12" "jax" diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython index 2dc4b0611c9c05..ee1a272ffedf42 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython @@ -75,9 +75,11 @@ COPY install/build_and_install_python.sh /install/ RUN /install/build_and_install_python.sh "3.9.4" RUN /install/build_and_install_python.sh "3.10.0" RUN /install/build_and_install_python.sh "3.11.0" +RUN /install/build_and_install_python.sh "3.12.0rc3" COPY install/install_pip_packages_by_version.sh /install/ # https://github.com/numpy/numpy/issues/22623 for `SETUPTOOLS_USE_DISTUTILS`. RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" "jax" RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10" "jax" RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.11" "jax" +RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.12" "jax" diff --git a/tensorflow/tools/ci_build/install/build_and_install_python.sh b/tensorflow/tools/ci_build/install/build_and_install_python.sh index fb8b6298542733..6055ea82536ada 100755 --- a/tensorflow/tools/ci_build/install/build_and_install_python.sh +++ b/tensorflow/tools/ci_build/install/build_and_install_python.sh @@ -15,11 +15,13 @@ # ============================================================================== VERSION="$1" +NO_RC_VERSION="${VERSION%rc*}" + shift mkdir /build cd /build -wget "https://www.python.org/ftp/python/${VERSION}/Python-${VERSION}.tgz" +wget "https://www.python.org/ftp/python/${NO_RC_VERSION}/Python-${VERSION}.tgz" tar xvzf "Python-${VERSION}.tgz" cd "Python-${VERSION}" ./configure --enable-optimizations "$@" diff --git a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh index dfc2a7272c0234..e743ad8f63fadc 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh @@ -27,8 +27,7 @@ rm "get-pip.py" PYTHON_VERSION=$(echo ${PIP##*.}) # only the last number, eg. 10 JAX_PACKAGES=( - # https://github.com/numpy/numpy/issues/22623 - "setuptools<=65.5.1" + "setuptools" "wheel" "cloudpickle" "colorama>=0.4.4" @@ -40,9 +39,8 @@ JAX_PACKAGES=( "six" "opt-einsum" "auditwheel" - "msgpack" "typing_extensions" - "ml_dtypes>=0.2.0" + "ml_dtypes>=0.3.0" "importlib_metadata>=4.6" ) @@ -94,7 +92,9 @@ fi if [[ "$2" == "jax" ]]; then # Special casing by version of Python # E.g., numpy supports py3.11 only from 1.23.4 - if [[ ${PYTHON_VERSION} -eq 11 ]]; then + if [[ ${PYTHON_VERSION} -eq 12 ]]; then + "${PIP_INSTALL[@]}" "numpy==1.26.0" "scipy==1.11.2" + elif [[ ${PYTHON_VERSION} -eq 11 ]]; then "${PIP_INSTALL[@]}" "numpy==1.23.4" "scipy==1.9.2" else "${PIP_INSTALL[@]}" "numpy==1.22.4" "scipy==1.7.3" diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index 0ef444f89f17d5..a1fd875f4bd5e5 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -210,7 +210,7 @@ def initialize_rbe_configs(): cuda_version = "11.8", cudnn_version = "8.6", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], tensorrt_install_path = "/usr", tensorrt_version = "8.4", sysroot = "/dt9", @@ -224,7 +224,7 @@ def initialize_rbe_configs(): cuda_version = "11.8", cudnn_version = "8.6", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], tensorrt_install_path = "/usr", tensorrt_version = "8.4", python_install_path = "/usr/local", @@ -236,7 +236,7 @@ def initialize_rbe_configs(): cuda_version = "12.0.1", cudnn_version = "8.8", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], sysroot = "/dt9", python_install_path = "/usr/local", ) @@ -248,7 +248,7 @@ def initialize_rbe_configs(): cuda_version = "12.0.1", cudnn_version = "8.8", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], python_install_path = "/usr/local", ) diff --git a/tensorflow/tools/toolchains/remote_config/containers.bzl b/tensorflow/tools/toolchains/remote_config/containers.bzl index b219c01f77d177..830b05b0c444b6 100644 --- a/tensorflow/tools/toolchains/remote_config/containers.bzl +++ b/tensorflow/tools/toolchains/remote_config/containers.bzl @@ -7,8 +7,8 @@ container_digests = { # JAX manylinux2014 configs. "cuda11.1-cudnn8-ubuntu20.04-manylinux2014-multipython": "sha256:011034978c5f1e5dcecc816b3b964faafc42b243001d9cd09ff7cfe4a6a0f4b9", "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", - "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:74a055cf4d996cf0ad280a9d929f0740eeb10de7696e2c42991ec719544ac656", - "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:5ed0933b22fce5073091deaeae98183461737b87a2e44c579e67ea4ee04b61d5", + "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", + "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl index d1e467c45c91b6..2453dc746feefb 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl @@ -210,7 +210,7 @@ def initialize_rbe_configs(): cuda_version = "11.8", cudnn_version = "8.6", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], tensorrt_install_path = "/usr", tensorrt_version = "8.4", sysroot = "/dt9", @@ -224,7 +224,7 @@ def initialize_rbe_configs(): cuda_version = "11.8", cudnn_version = "8.6", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], tensorrt_install_path = "/usr", tensorrt_version = "8.4", python_install_path = "/usr/local", @@ -236,7 +236,7 @@ def initialize_rbe_configs(): cuda_version = "12.0.1", cudnn_version = "8.8", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], sysroot = "/dt9", python_install_path = "/usr/local", ) @@ -248,7 +248,7 @@ def initialize_rbe_configs(): cuda_version = "12.0.1", cudnn_version = "8.8", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], python_install_path = "/usr/local", ) diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl index b219c01f77d177..830b05b0c444b6 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl @@ -7,8 +7,8 @@ container_digests = { # JAX manylinux2014 configs. "cuda11.1-cudnn8-ubuntu20.04-manylinux2014-multipython": "sha256:011034978c5f1e5dcecc816b3b964faafc42b243001d9cd09ff7cfe4a6a0f4b9", "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", - "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:74a055cf4d996cf0ad280a9d929f0740eeb10de7696e2c42991ec719544ac656", - "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:5ed0933b22fce5073091deaeae98183461737b87a2e44c579e67ea4ee04b61d5", + "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", + "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index d1e467c45c91b6..2453dc746feefb 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -210,7 +210,7 @@ def initialize_rbe_configs(): cuda_version = "11.8", cudnn_version = "8.6", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], tensorrt_install_path = "/usr", tensorrt_version = "8.4", sysroot = "/dt9", @@ -224,7 +224,7 @@ def initialize_rbe_configs(): cuda_version = "11.8", cudnn_version = "8.6", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], tensorrt_install_path = "/usr", tensorrt_version = "8.4", python_install_path = "/usr/local", @@ -236,7 +236,7 @@ def initialize_rbe_configs(): cuda_version = "12.0.1", cudnn_version = "8.8", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], sysroot = "/dt9", python_install_path = "/usr/local", ) @@ -248,7 +248,7 @@ def initialize_rbe_configs(): cuda_version = "12.0.1", cudnn_version = "8.8", os = "ubuntu20.04-manylinux2014-multipython", - python_versions = ["3.8", "3.9", "3.10", "3.11"], + python_versions = ["3.9", "3.10", "3.11", "3.12"], python_install_path = "/usr/local", ) diff --git a/third_party/xla/tools/toolchains/remote_config/containers.bzl b/third_party/xla/tools/toolchains/remote_config/containers.bzl index b219c01f77d177..830b05b0c444b6 100644 --- a/third_party/xla/tools/toolchains/remote_config/containers.bzl +++ b/third_party/xla/tools/toolchains/remote_config/containers.bzl @@ -7,8 +7,8 @@ container_digests = { # JAX manylinux2014 configs. "cuda11.1-cudnn8-ubuntu20.04-manylinux2014-multipython": "sha256:011034978c5f1e5dcecc816b3b964faafc42b243001d9cd09ff7cfe4a6a0f4b9", "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", - "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:74a055cf4d996cf0ad280a9d929f0740eeb10de7696e2c42991ec719544ac656", - "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:5ed0933b22fce5073091deaeae98183461737b87a2e44c579e67ea4ee04b61d5", + "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", + "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", From c2655123109b494679367639a4ba890cc9017c78 Mon Sep 17 00:00:00 2001 From: Alan Kelly Date: Thu, 21 Sep 2023 07:43:51 -0700 Subject: [PATCH 084/567] Merge same size types for PACK to reduce binary size PiperOrigin-RevId: 567304689 --- tensorflow/lite/kernels/pack.cc | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/kernels/pack.cc b/tensorflow/lite/kernels/pack.cc index e4bc806a577805..5bb1a618d446c5 100644 --- a/tensorflow/lite/kernels/pack.cc +++ b/tensorflow/lite/kernels/pack.cc @@ -116,34 +116,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, kOutputTensor, &output)); switch (output->type) { - case kTfLiteFloat32: { - return PackImpl(context, node, output, data->values_count, - data->axis); - } - case kTfLiteUInt8: { - return PackImpl(context, node, output, data->values_count, - data->axis); - } - case kTfLiteUInt32: { - return PackImpl(context, node, output, data->values_count, - data->axis); - } - case kTfLiteInt8: { + case kTfLiteInt8: + case kTfLiteUInt8: return PackImpl(context, node, output, data->values_count, data->axis); - } - case kTfLiteInt16: { + case kTfLiteInt16: return PackImpl(context, node, output, data->values_count, data->axis); - } - case kTfLiteInt32: { + case kTfLiteFloat32: + case kTfLiteInt32: + case kTfLiteUInt32: return PackImpl(context, node, output, data->values_count, data->axis); - } - case kTfLiteInt64: { + case kTfLiteInt64: return PackImpl(context, node, output, data->values_count, data->axis); - } default: { TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by pack.", TfLiteTypeGetName(output->type)); From 4f4958cc106a8f0db94e0d2750984a641c0197a7 Mon Sep 17 00:00:00 2001 From: Neel Kovelamudi Date: Thu, 21 Sep 2023 08:36:09 -0700 Subject: [PATCH 085/567] Pass along the shape param from tf.compat.v1.get_variable to its underlying variable creator. At this point, we have already checked that either shape is compatible w/ initial_value, or that it is not specified (None). Therefore it should always be safe to pass it. Having it can help subsequent nested variable creators save some cycles tracing the initializer function when all they need to know is the shape. PiperOrigin-RevId: 567317207 --- .../feature_column/feature_column_test.py | 28 ++++++------------- .../feature_column/feature_column_v2_test.py | 12 +++----- tensorflow/python/ops/variable_scope.py | 17 ++++------- 3 files changed, 17 insertions(+), 40 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index f8fbd8db7e3b15..3cde6c5657edc1 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -4924,19 +4924,16 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): - self.assertEqual(dtypes.float32, dtype) if partition_variables: - assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) - return array_ops.slice( - embedding_values, partition_info.var_offset, shape - ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - return embedding_values + + self.assertEqual(dtypes.float32, dtype) + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( @@ -4979,12 +4976,7 @@ def _initializer(shape, dtype, partition_info=None): for v in global_vars: self.assertIsInstance(v, variables_lib.Variable) with _initialized_session(): - if partition_variables: - self.assertAllEqual( - embedding_values, array_ops.concat(global_vars, axis=0) - ) - else: - self.assertAllEqual(embedding_values, global_vars[0]) + self.assertAllEqual(embedding_values, global_vars[0]) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) if use_safe_embedding_lookup: @@ -5791,19 +5783,16 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): - self.assertEqual(dtypes.float32, dtype) if partition_variables: - assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) - return array_ops.slice( - embedding_values, partition_info.var_offset, shape - ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - return embedding_values + + self.assertEqual(dtypes.float32, dtype) + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( @@ -5853,11 +5842,10 @@ def _initializer(shape, dtype, partition_info=None): self.assertCountEqual(('vars/embedding_weights/part_0:0', 'vars/embedding_weights/part_1:0'), tuple([v.name for v in global_vars])) - embedding_var = array_ops.concat(global_vars, axis=0) else: self.assertCountEqual(('vars/embedding_weights:0',), tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index cb98bf60c03184..21dcbb4452d6c7 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -5762,19 +5762,16 @@ def test_get_dense_tensor(self, use_safe_embedding_lookup, ) def _initializer(shape, dtype, partition_info=None): - self.assertEqual(dtypes.float32, dtype) if partition_variables: - assert partition_info is not None self.assertEqual([vocabulary_size, embedding_dimension], partition_info.full_shape) self.assertAllEqual((2, embedding_dimension), shape) - return array_ops.slice( - embedding_values, partition_info.var_offset, shape - ) else: self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertIsNone(partition_info) - return embedding_values + + self.assertEqual(dtypes.float32, dtype) + return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( @@ -5824,11 +5821,10 @@ def _initializer(shape, dtype, partition_info=None): self.assertCountEqual(('vars/aaa_bbb_shared_embedding/part_0:0', 'vars/aaa_bbb_shared_embedding/part_1:0'), tuple([v.name for v in global_vars])) - embedding_var = array_ops.concat(global_vars, axis=0) else: self.assertCountEqual(('vars/aaa_bbb_shared_embedding:0',), tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 622f70732adaba..33dd0438fa2f2f 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -807,8 +807,7 @@ def _get_partitioned_variable(self, use_resource=use_resource, constraint=constraint, synchronization=synchronization, - aggregation=aggregation, - ) + aggregation=aggregation) # pylint: disable=protected-access var._set_save_slice_info( @@ -881,8 +880,7 @@ def _get_single_variable(self, raise ValueError("If initializer is a constant, do not specify shape.") dtype = dtypes.as_dtype(dtype) - if shape is not None: - shape = tensor_shape.as_shape(shape) + shape = tensor_shape.as_shape(shape) if name in self._vars: # Here we handle the case when returning an existing variable. @@ -903,9 +901,7 @@ def _get_single_variable(self, raise ValueError("%s Originally defined at:\n\n%s" % (err_msg, "".join(traceback.format_list(tb)))) found_var = self._vars[name] - if shape is not None and not shape.is_compatible_with( - found_var.get_shape() - ): + if not shape.is_compatible_with(found_var.get_shape()): raise ValueError("Trying to share variable %s, but specified shape %s" " and found shape %s." % (name, shape, found_var.get_shape())) @@ -925,7 +921,6 @@ def _get_single_variable(self, # Create the tensor to initialize the variable with default value. if initializer is None: - assert shape is not None initializer, initializing_from_value = self._get_default_initializer( name=name, shape=shape, dtype=dtype) # Enter an init scope when creating the initializer. @@ -937,7 +932,7 @@ def _get_single_variable(self, # Instantiate initializer if provided initializer is a type object. if tf_inspect.isclass(initializer): initializer = initializer() - if shape is not None and shape.is_fully_defined(): + if shape.is_fully_defined(): if "partition_info" in tf_inspect.getargspec(initializer).args: init_val = functools.partial(initializer, shape.as_list(), @@ -972,9 +967,7 @@ def _get_single_variable(self, constraint=constraint, use_resource=use_resource, synchronization=synchronization, - aggregation=aggregation, - shape=shape, - ) + aggregation=aggregation) if context.executing_eagerly() and self._store_eager_variables: if collections: ops.add_to_collections(collections, v) From 060f5118478e4c0cc3fb5482470ff9c0101c752d Mon Sep 17 00:00:00 2001 From: George Necula Date: Thu, 21 Sep 2023 09:13:10 -0700 Subject: [PATCH 086/567] Improve shape refinement to not require inlining. PiperOrigin-RevId: 567326536 --- .../compiler/tests/xla_call_module_test.py | 82 -- third_party/stablehlo/temporary.patch | 1180 +---------------- .../xla/third_party/stablehlo/temporary.patch | 1180 +---------------- .../xla/python/refine_polymorphic_shapes.cc | 4 + 4 files changed, 36 insertions(+), 2410 deletions(-) diff --git a/tensorflow/compiler/tests/xla_call_module_test.py b/tensorflow/compiler/tests/xla_call_module_test.py index ee651b107c5b8e..a24930a7b8c846 100644 --- a/tensorflow/compiler/tests/xla_call_module_test.py +++ b/tensorflow/compiler/tests/xla_call_module_test.py @@ -259,88 +259,6 @@ def f(x): # x: f32[2, b] self._assertOpOutputMatchesExpected(f, (x,), (np.sin(x),)) - def test_poly_with_inner_token(self): - # The inner functions pass tokens through - x = np.arange(12, dtype=np.float32).reshape((3, 4)) - - def f(x): # x : f32[b0, b1] - # 1 + sin(x) - module, version = serialize(""" -module @jit_f.0 attributes {jax.uses_shape_polymorphism = true} { - func.func public @main(%arg0: tensor) -> tensor { - %0 = stablehlo.get_dimension_size %arg0, dim = 0 : (tensor) -> tensor - %1 = stablehlo.get_dimension_size %arg0, dim = 1 : (tensor) -> tensor - %2 = stablehlo.constant dense<> : tensor<0xi1> - %3:2 = call @_wrapped_main(%0, %1, %2, %arg0) : (tensor, tensor, tensor<0xi1>, tensor) -> (tensor<0xi1>, tensor) - return %3#1 : tensor - } - - func.func private @_wrapped_main(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xi1> {jax.token = true}, %arg3: tensor) -> (tensor<0xi1> {jax.token = true}, tensor) { - %0 = stablehlo.create_token : !stablehlo.token - %1 = stablehlo.sine %arg3 : tensor - %2 = stablehlo.constant dense<1.000000e+00> : tensor - %3 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> - %4 = stablehlo.reshape %arg1 : (tensor) -> tensor<1xi32> - %5 = stablehlo.concatenate %3, %4, dim = 0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %6 = stablehlo.dynamic_broadcast_in_dim %2, %5, dims = [] : (tensor, tensor<2xi32>) -> tensor - %7 = stablehlo.add %1, %6 : tensor - %8 = stablehlo.constant dense<> : tensor<0xi1> - return %8, %7 : tensor<0xi1>, tensor - } -} -""") - return xla.call_module( - [x], - version=version, - module=module, - Tout=[x.dtype], - Sout=[x.shape], - has_token_input_output=False, - platforms=[self.testing_platform()], - ) - - self._assertOpOutputMatchesExpected(f, (x,), (1. + np.sin(x),)) - - def test_poly_with_inner_prefix_token(self): - # Sometimes inner functions take a token as first argument - x = np.arange(12, dtype=np.float32).reshape((3, 4)) - - def f(x): # x : f32[b0, b1] - # 1 + sin(x) - module, version = serialize(""" -module @jit_f.0 attributes {jax.uses_shape_polymorphism = true} { - func.func public @main(%arg0: tensor) -> tensor { - %0 = stablehlo.get_dimension_size %arg0, dim = 0 : (tensor) -> tensor - %1 = stablehlo.get_dimension_size %arg0, dim = 1 : (tensor) -> tensor - %2 = stablehlo.create_token : !stablehlo.token - %3:2 = call @_wrapped_main(%2, %0, %1, %arg0) : (!stablehlo.token, tensor, tensor, tensor) -> (!stablehlo.token, tensor) - return %3#1 : tensor - } - - func.func private @_wrapped_main(%arg_token: !stablehlo.token, %arg0: tensor, %arg1: tensor, %arg3: tensor) -> (!stablehlo.token, tensor) { - %1 = stablehlo.sine %arg3 : tensor - %2 = stablehlo.constant dense<1.000000e+00> : tensor - %3 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> - %4 = stablehlo.reshape %arg1 : (tensor) -> tensor<1xi32> - %5 = stablehlo.concatenate %3, %4, dim = 0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %6 = stablehlo.dynamic_broadcast_in_dim %2, %5, dims = [] : (tensor, tensor<2xi32>) -> tensor - %7 = stablehlo.add %1, %6 : tensor - return %arg_token, %7 : !stablehlo.token, tensor - } -} -""") - return xla.call_module( - [x], - version=version, - module=module, - Tout=[x.dtype], - Sout=[x.shape], - has_token_input_output=False, - platforms=[self.testing_platform()], - ) - - self._assertOpOutputMatchesExpected(f, (x,), (1. + np.sin(x),)) - def test_wrong_actual_args_errors(self): x = np.arange(6, dtype=np.float32).reshape((3, 2)) y = np.arange(6, dtype=np.int32).reshape((2, 3)) diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch index 0cb078c3b89795..4c4228163a6f04 100644 --- a/third_party/stablehlo/temporary.patch +++ b/third_party/stablehlo/temporary.patch @@ -1426,353 +1426,7 @@ diff --ruN a/stablehlo/stablehlo/tests/stablehlo_canonicalize_dynamism.mlir b/st diff --ruN a/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir b/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir --- stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir +++ stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir -@@ -31,6 +31,7 @@ - - // ----- - -+// CHECK-LABEL: module @has_main - module @has_main { - // CHECK: main - func.func @main(%arg0: tensor<4xf32>) -> tensor<*xi32> { -@@ -38,17 +39,11 @@ - %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> - func.return %0 : tensor<*xi32> - } -- -- // CHECK: helper -- func.func @helper(%arg0: tensor<4xf32>) -> tensor<*xi32> { -- // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<*xi32> -- %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> -- func.return %0 : tensor<*xi32> -- } --} -- --// ----- -- -+} -+ -+// ----- -+ -+// CHECK-LABEL: func @error_unsupported_operation - func.func @error_unsupported_operation(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> index { - // CHECK: stablehlo.add{{.*}} -> tensor - %0 = stablehlo.add %arg0, %arg1 : (tensor<4xf32>, tensor<4xf32>) -> tensor -@@ -472,11 +467,312 @@ - - // ----- - --// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth --func.func @refine_bitcast_convert_same_bitwidth(%arg0 : tensor<4xf32>) -> tensor<*xi32> { -+// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth_unranked_result -+func.func @refine_bitcast_convert_same_bitwidth_unranked_result(%arg0 : tensor<4xf32>) -> tensor<*xi32> { - // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<4xi32> - %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> - func.return %0 : tensor<*xi32> -+} -+ -+// ----- -+ -+// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth -+func.func @refine_bitcast_convert_same_bitwidth() -> tensor { -+ %0 = stablehlo.constant dense<[3, 5, 0]> : tensor<3xi32> -+ %21 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<3xi32>) -> tensor -+ // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<3x5x0xf32> -+ %48 = stablehlo.bitcast_convert %21 : (tensor) -> tensor -+ return %48 : tensor -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call -+module @refine_call { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %1 = stablehlo.constant dense<4> : tensor -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ %2 = call @refine_call_callee(%1, %0) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // CHECK: refine_call_callee(%arg0: tensor<4xf32>) -> tensor<4xf32> -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ // CHECK: stablehlo.constant dense<4> -+ %0 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> -+ %1 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<1xi32>) -> tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_arguments -+module @refine_call_dimension_arguments { -+ func.func public @main(%arg0: tensor) -> tensor { -+ // CHECK: [[RESULT:%.*]] = call @callee -+ // CHECK: return [[RESULT]] -+ %0 = stablehlo.constant dense<3> : tensor -+ %1 = call @callee(%0, %0, %arg0) : (tensor, tensor, tensor) -> tensor -+ return %1 : tensor -+ } -+ // %arg0 and %arg1 are dimension arguments -+ // CHECK: @callee([[ARG0:%.*]]: tensor) -> tensor -+ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> -+ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg1: tensor -+ %1 = stablehlo.add %0, %arg2: tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_prefix_token_and_dimension_arguments -+module @refine_call_prefix_token_and_dimension_arguments { -+ func.func public @main(%arg0: tensor) -> tensor { -+ // CHECK: [[RESULT:%.*]] = call @callee -+ // CHECK: return [[RESULT]] -+ %0 = stablehlo.constant dense<3> : tensor -+ %token = stablehlo.create_token : !stablehlo.token -+ %1 = call @callee(%token, %0, %0, %arg0) : (!stablehlo.token, tensor, tensor, tensor) -> tensor -+ return %1 : tensor -+ } -+ // %arg0 and %arg1 are dimension arguments -+ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor -+ func.func private @callee(%arg_token: !stablehlo.token, %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> -+ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg1: tensor -+ %1 = stablehlo.add %0, %arg2: tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_arguments_followed_by_token -+module @refine_call_dimension_arguments_followed_by_token { -+ func.func public @main(%arg0: tensor) -> tensor { -+ // CHECK: [[RESULT:%.*]] = call @callee -+ // CHECK: return [[RESULT]] -+ %0 = stablehlo.constant dense<3> : tensor -+ %token = stablehlo.create_token : !stablehlo.token -+ %1 = call @callee(%0, %0, %token, %arg0) : (tensor, tensor, !stablehlo.token, tensor) -> tensor -+ return %1 : tensor -+ } -+ // %arg0 and %arg1 are dimension arguments -+ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor -+ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg_token: !stablehlo.token, %arg2: tensor) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> -+ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg1: tensor -+ %1 = stablehlo.add %0, %arg2: tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_multiple_call_with_same_context -+module @refine_multiple_call_with_same_context { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ %2 = call @refine_call_callee(%arg0_new, %1) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_multiple_call_constant_function -+module @refine_multiple_call_constant_function { -+ func.func @main(%arg0: tensor<5xf32>) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<16> -+ // CHECK: return [[RESULT0]] -+ %0 = stablehlo.constant dense<4> : tensor -+ %1 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor -+ %2 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor -+ %3 = stablehlo.add %1, %2: tensor -+ return %3 : tensor -+ } -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor<5xf32>) -> tensor { -+ // CHECK: [[RESULT1:%.*]] = stablehlo.constant dense<8> -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg0: tensor -+ return %0 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_multiple_with_different_number_dimension_arguments { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ // Ensure that the first argument is not a constant at the second call site -+ %arg0_different_f32 = stablehlo.bitcast_convert %arg0_new : (tensor) -> tensor -+ %arg0_different_i32 = stablehlo.bitcast_convert %arg0_different_f32 : (tensor) -> tensor -+ // expected-error@+1{{incorrect number of operands for callee}} -+ %2 = call @refine_call_callee(%arg0_different_i32, %1) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context. Previous context had 1 and now we have 2 non-dimension arguments}} -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_multiple_different_dimension_arguments { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ %arg0_different = stablehlo.add %arg0_new, %arg0_new : tensor -+ // expected-error@+1{{incorrect number of operands for callee}} -+ %2 = call @refine_call_callee(%arg0_different, %1) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_multiple_different_non_dimension_arguments { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ %2 = stablehlo.constant dense<[1., 2.]> : tensor<2xf32> -+ %3 = stablehlo.concatenate %1, %2, dim = 0 : (tensor, tensor<2xf32>) -> tensor -+ // expected-error@+1{{incorrect number of operands for callee}} -+ %4 = call @refine_call_callee(%arg0_new, %3) : (tensor, tensor) -> tensor -+ return %4 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_recursive { -+ func.func @main() -> tensor { -+ %0 = stablehlo.constant dense<3> : tensor -+ %1 = call @refine_call_callee(%0) : (tensor) -> tensor -+ return %1 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee is being refined recursively}} -+ func.func @refine_call_callee(%arg0: tensor) -> tensor { -+ // expected-error@+1{{incorrect number of operands}} -+ %0 = call @refine_call_callee(%arg0) : (tensor) -> tensor -+ return %0 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_main_argument_unranked { -+ // expected-error@+1{{main must be refined with static shape arguments}} -+ func.func public @main(%arg0: tensor<*xi32>) -> tensor<*xi32> { -+ %2 = call @callee(%arg0) : (tensor<*xi32>) -> tensor<*xi32> -+ return %2 : tensor<*xi32> -+ } -+ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { -+ return %arg0 : tensor<*xi32> -+ } -+} -+ -+// ----- -+ -+module @refine_call_main_argument_dynamic_shape { -+ // expected-error@+1{{main must be refined with static shape arguments}} -+ func.func public @main(%arg0: tensor) -> tensor { -+ %2 = call @callee(%arg0) : (tensor) -> tensor -+ return %2 : tensor -+ } -+ func.func private @callee(%arg0: tensor) -> tensor { -+ return %arg0 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_callee_argument_unranked { -+ func.func public @main(%arg0: tensor<1xi64>) -> tensor<*xi32> { -+ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor<*xi32> -+ %2 = call @callee(%1) : (tensor<*xi32>) -> tensor<*xi32> -+ return %2 : tensor<*xi32> -+ } -+ // expected-error@+1{{callee must be refined with static shape arguments}} -+ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { -+ return %arg0 : tensor<*xi32> -+ } -+} -+ -+// ----- -+ -+module @refine_call_callee_argument_dynamic_shape { -+ func.func public @main(%arg0: tensor<1xi64>) -> tensor { -+ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor -+ %2 = call @callee(%1) : (tensor) -> tensor -+ return %2 : tensor -+ } -+ // expected-error@+1{{callee must be refined with static shape arguments}} -+ func.func private @callee(%arg0: tensor) -> tensor { -+ return %arg0 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_argument_non_scalar -+// The non-scalar constant is not folded into the callee -+module @refine_call_dimension_argument_non_scalar { -+ func.func public @main() -> tensor<4xi32> { -+ // CHECK: dense<[1, 2, 3, 4]> : tensor<4xi32> -+ %0 = stablehlo.constant dense<[1, 2, 3, 4]> : tensor<4xi32> -+ %1 = call @callee(%0) : (tensor<4xi32>) -> tensor<4xi32> -+ return %1 : tensor<4xi32> -+ } -+ func.func private @callee(%arg0: tensor<4xi32>) -> tensor<4xi32> { -+ // CHECK: return %arg0 : tensor<4xi32> -+ return %arg0 : tensor<4xi32> -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_argument_not_integer -+module @refine_call_dimension_argument_not_integer { -+ func.func public @main() -> tensor { -+ %0 = stablehlo.constant dense<3.> : tensor -+ // CHECK: call @callee({{.*}}) : (tensor) -> tensor -+ %2 = call @callee(%0) : (tensor) -> tensor -+ return %2 : tensor -+ } -+ func.func private @callee(%arg0: tensor) -> tensor { -+ return %arg0 : tensor -+ } - } - - // ----- -@@ -607,12 +903,55 @@ +@@ -607,12 +607,55 @@ // ----- @@ -1977,74 +1631,7 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloCanonicalizeDynamism.cpp b/ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp --- stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp +++ stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp -@@ -11,9 +11,48 @@ - See the License for the specific language governing permissions and - limitations under the License. - ==============================================================================*/ -- -+/* -+This shape refinement pass was designed to resolve the dynamic shapes in -+a StableHLO module produced by JAX serialization with shape polymorphism. -+Such a module has the following properties: -+ -+ * it contains a "main" function with statically-shaped arguments; -+ the result types may be dynamically shaped. -+ * all the dynamic shapes depend only on the input shapes (no shape -+ dependency on the input array contents). We refer to the operations that -+ depend transitively only on the input shapes (e.g., as given by -+ `stablehlo.get_dimension_size`) as `dimension` operations. -+ All dimension values can be resolved to constants through inter-procedural -+ constant folding. -+ * intermediate functions may take a number of token arguments (of type -+ !stablehlo.token) at the start of the argument list, followed by some -+ dimension arguments (integer scalars). -+ * some intermediate functions may return dimension values. -+ E.g., the `floordiv` operation on dimension values may be implemented -+ using intermediate functions. These constant functions need to be -+ constant-folded. -+ * All the dynamic shapes can be resolved through shape inference from the -+ dimension values. The dimension values themselves do not depend on the -+ result of shape inference. -+ -+ -+For each intermediate function we compute a refinement context, including -+the values of the dimension arguments and the static shapes of the other -+arguments. We compute the refinement context when we encounter a function call, -+and then we refine the callee recursively. We abort in the presence of -+recursive calls. -+We also abort if a function is called with multiple distinct refinement -+contexts. -+ -+After refinement, all operations should have static shapes, all calls to -+constant functions are replaced with constants, and all dimension arguments -+for intermediate functions are dropped and are replaced with constants. -+*/ -+#include - #include - #include -+#include -+#include - #include - #include - -@@ -24,8 +63,10 @@ - #include "llvm/ADT/SmallSet.h" - #include "llvm/ADT/SmallVector.h" - #include "llvm/ADT/StringRef.h" -+#include "llvm/Support/Debug.h" - #include "llvm/Support/ErrorHandling.h" - #include "llvm/Support/FormatVariadic.h" -+#include "llvm/Support/ScopedPrinter.h" - #include "mlir/Dialect/Func/IR/FuncOps.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/IR/BuiltinOps.h" -@@ -39,10 +80,13 @@ - #include "mlir/IR/Types.h" - #include "mlir/IR/Value.h" - #include "mlir/Interfaces/InferTypeOpInterface.h" -+#include "mlir/Support/DebugStringHelper.h" - #include "mlir/Support/LogicalResult.h" -+#include "mlir/Support/LLVM.h" +@@ -43,6 +43,7 @@ #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "stablehlo/dialect/Base.h" #include "stablehlo/dialect/ChloOps.h" @@ -2052,407 +1639,7 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl #include "stablehlo/dialect/StablehloOps.h" #include "stablehlo/dialect/TypeInference.h" #include "stablehlo/transforms/Passes.h" -@@ -50,10 +94,144 @@ - namespace mlir { - namespace stablehlo { - -+#define DEBUG_TYPE "stablehlo-refine-shapes" -+ - #define GEN_PASS_DEF_STABLEHLOREFINESHAPESPASS - #include "stablehlo/transforms/Passes.h.inc" - - namespace { -+ -+// Per-module state for shape refinement. -+class RefineShapeState { -+ public: -+ // Validates that we are not attempting to refine a function with a different -+ // context than previously, and are not attempting recursive refinement. -+ // Returns failure() if validation fails. On success, returns a boolean -+ // that specifies whether the function has already been refined. -+ FailureOr validateFunctionRefinement( -+ func::FuncOp func, SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes) { -+ StringRef funcName = func.getName(); -+ auto found = refinementContexts.find(func); -+ if (found == refinementContexts.end()) { -+ return false; // not already refined. -+ } -+ auto prevDimensionArguments = std::get<0>(found->second); -+ auto prevNonDimensionArgumentTypes = std::get<1>(found->second); -+ // Since we refine until fixed point, we will refine a call to a function -+ // both for the original function and for the refined one. In the latter -+ // case, we should have empty dimensionArguments but the same -+ // nonDimensionArgumentTypes. -+ if (prevNonDimensionArgumentTypes != nonDimensionArgumentTypes || -+ (!dimensionArguments.empty() && -+ prevDimensionArguments != dimensionArguments)) { -+ emitDifferentRefinementContextError( -+ func, /*dimensionArguments=*/dimensionArguments, -+ /*nonDimensionArgumentTypes=*/nonDimensionArgumentTypes, -+ /*prevDimensionArguments=*/prevDimensionArguments, -+ /*prevNonDimensionArgumentShapes=*/prevNonDimensionArgumentTypes); -+ return failure(); -+ } -+ for (auto funcOnStack : functionsBeingRefined) { -+ if (funcOnStack == funcName) { -+ func.emitOpError() << "Function " << funcName -+ << " is being refined recursively\n"; -+ return failure(); -+ } -+ } -+ return true; // already refined. -+ } -+ -+ // Updates the state to signal the starting of a function refinement. -+ // Callers must call `finishFunctionRefinement` when done. -+ void startFunctionRefinement(func::FuncOp func, -+ SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes) { -+ StringRef funcName = func.getName(); -+ functionsBeingRefined.push_back(funcName); -+ refinementContexts[func] = -+ std::make_tuple(dimensionArguments, nonDimensionArgumentTypes); -+ } -+ -+ // Updates the state to signal the starting of a function refinement. -+ LogicalResult finishFunctionRefinement(func::FuncOp func) { -+ if (func.getName() != -+ functionsBeingRefined[functionsBeingRefined.size() - 1]) { -+ func.emitOpError() << "Expected to find " << func.getName() -+ << " at the top of the stack"; -+ return failure(); -+ } -+ functionsBeingRefined.pop_back(); -+ return success(); -+ } -+ -+ private: -+ // Maps refined functions to the refinement context: the values of dimension -+ // arguments and the types of non-dimension arguments. A function is added -+ // here when we start refining it. -+ DenseMap, SmallVector>> -+ refinementContexts; -+ -+ // A stack of functions that are in the process of being refined, the current -+ // one is last. -+ SmallVector functionsBeingRefined; -+ -+ void emitDifferentRefinementContextError( -+ func::FuncOp func, SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes, -+ SmallVector prevDimensionArguments, -+ SmallVector prevNonDimensionArgumentShapes) { -+ InFlightDiagnostic msg = func.emitOpError(); -+ msg << "Function " << func.getName() -+ << " has already been refined with a different " -+ "refinement context. "; -+ int countShowNonDimensionArguments = -+ std::min(prevNonDimensionArgumentShapes.size(), -+ nonDimensionArgumentTypes.size()); -+ if (prevNonDimensionArgumentShapes.size() != -+ nonDimensionArgumentTypes.size()) { -+ msg << "Previous context had " << prevNonDimensionArgumentShapes.size() -+ << " and now we have " << nonDimensionArgumentTypes.size() -+ << " non-dimension arguments. "; -+ } -+ msg << "The differences among the first " << countShowNonDimensionArguments -+ << " non-dimension argument types are: "; -+ for (auto i = 0; i < countShowNonDimensionArguments; ++i) { -+ if (prevNonDimensionArgumentShapes[i] != nonDimensionArgumentTypes[i]) { -+ msg << "Non-dimension argument[" << i << "] previously had type " -+ << debugString(prevNonDimensionArgumentShapes[i]) -+ << " and now has type " << debugString(nonDimensionArgumentTypes[i]) -+ << ". "; -+ } -+ } -+ int countShowDimensionArguments = -+ std::min(prevDimensionArguments.size(), dimensionArguments.size()); -+ if (prevDimensionArguments.size() != dimensionArguments.size()) { -+ msg << "Previous context had " << prevDimensionArguments.size() -+ << " and now we have " << dimensionArguments.size() -+ << " dimension arguments. "; -+ } -+ msg << "The differences among the first " << countShowDimensionArguments -+ << " dimension arguments are: "; -+ for (auto i = 0; i < countShowDimensionArguments; ++i) { -+ if (prevDimensionArguments[i] != dimensionArguments[i]) { -+ msg << "Dimension argument[" << i << "] previously was " -+ << prevDimensionArguments[i].getSExtValue() << " and now is " -+ << dimensionArguments[i].getSExtValue() << ". "; -+ } -+ } -+ } -+}; -+ -+// Refines a function. -+// Returns `true` if the function had already been processed with the same -+// refinement context and `false` if this is the first time we refined the -+// function. Returns failure() if we encounter an error. -+LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, -+ RefineShapeState* state, -+ size_t nrPrefixTokenArguments, -+ SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes); - - // DenseElementsAttr can be constructed from ArrayRef but not from - // ArrayRef. This helper bridges the gap. -@@ -424,11 +602,10 @@ - diag << "refineValues failed for " << types << ": expected " - << values.size() << " types, got " << types.size(); - }); -- -- // Check whether `types` contain any new information with respect to existing -- // return types. Even if just a single dimension size out of an entire tensor -- // type got updated, using `inferMostSpecificType` ensures that we don't -- // miss that. -+ // Check whether `types` contain any new information with respect to -+ // existing return types. Even if just a single dimension size out of an -+ // entire tensor type got updated, using `inferMostSpecificType` ensures -+ // that we don't miss that. - bool needsRefinement = false; - SmallVector refinedTypes; - for (auto it : llvm::zip(values.getTypes(), types)) { -@@ -468,11 +645,13 @@ - - // Simply changing operand type of `func.return` won't work because - // that won't update the FunctionType of the enclosing `func.func`. -- // Nonetheless, we still want to support these ops because they are widely -- // used in StableHLO programs (although the plan of record is to replace -- // `func.return` ops in StableHLO programs with `stablehlo.return`: -- // https://github.com/openxla/stablehlo/issues/425). -+ // Nonetheless, we still want to support these ops because they are -+ // widely used in StableHLO programs (although the plan of record is to -+ // replace `func.return` ops in StableHLO programs with -+ // `stablehlo.return`: https://github.com/openxla/stablehlo/issues/425). - if (isa(user)) continue; -+ -+ if (isa(user)) continue; - - // Unlike in TensorFlow's type inference pass, here we work only with - // allowlisted ops to focus our support on well-defined semantics of -@@ -489,7 +668,8 @@ - value.setType(refinedType); - - // Special case: for `func.return`, guard the refinement with a cast -- // and leave propagation of the refined return type to a dedicated pattern. -+ // and leave propagation of the refined return type to a dedicated -+ // pattern. - auto isFuncReturn = [](OpOperand& use) -> bool { - return isa(use.getOwner()); - }; -@@ -505,8 +685,8 @@ - - // Refines the return types of the given operation using the given types. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, - ArrayRef types) { - if (failed(refineValues(rewriter, op, op->getResults(), types))) -@@ -528,12 +708,12 @@ - // traversal, and only then we apply the refinements. If there are other - // types, then the corresponding refinements must be completely empty. - // 2) Encodings are not supported. In principle, TypeExtensions should be --// supportable, but this needs careful thinking through. Given that no one --// asked for support for bounded dynamism in this pass yet, this is left --// for future work. -+// supportable, but this needs careful thinking through. Given that no -+// one asked for support for bounded dynamism in this pass yet, this is -+// left for future work. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, - ArrayRef refinements) { - SmallVector flattenedTypes; -@@ -623,8 +803,8 @@ - - // Refines the return type of the given operation using the given shape. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - template - LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, - ArrayRef shape) { -@@ -633,8 +813,8 @@ - - // Refines the return type of the given operation using the given shape. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - template - LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, - Value shapeValue) { -@@ -647,6 +827,52 @@ - return refineReturnShape(rewriter, op, shape); - } - -+// Dimension arguments are leading scalar constant arguments, optionally -+// preceeded by some stablehlo.token arguments. -+SmallVector getDimensionArguments(func::CallOp callOp, -+ size_t* nrPrefixTokenArguments) { -+ *nrPrefixTokenArguments = 0; -+ SmallVector operands = callOp.getOperands(); -+ SmallVector dimensionArguments; -+ for (size_t i = 0; i < operands.size(); ++i) { -+ if (i == *nrPrefixTokenArguments && isa(operands[i].getType())) { -+ (*nrPrefixTokenArguments)++; -+ continue; -+ } -+ RankedTensorType operandType = -+ dyn_cast(operands[i].getType()); -+ if (!operandType || operandType.getRank() != 0 || -+ !operandType.getElementType().template isa()) -+ break; -+ SmallVector operand_int; -+ if (failed(hlo::matchInts(operands[i], operand_int))) { -+ break; -+ } -+ dimensionArguments.push_back(operand_int[0]); -+ } -+ return dimensionArguments; -+} -+ -+std::optional> isConstantFunction( -+ func::FuncOp func) { -+ LLVM_DEBUG(llvm::dbgs() << "check if " << func.getName() -+ << " is a constant function\n"); -+ SmallVector returnedConstants; -+ func::ReturnOp ret = *func.getOps().begin(); -+ bool isConstant = llvm::all_of(ret->getOperands(), [&](auto returnVal) { -+ DenseIntElementsAttr attr; -+ Operation* return_operand_def = returnVal.getDefiningOp(); -+ if (return_operand_def && -+ matchPattern(return_operand_def, m_Constant(&attr))) { -+ returnedConstants.push_back(attr); -+ return true; -+ } -+ return false; -+ }); -+ if (isConstant) return returnedConstants; -+ return std::nullopt; -+} -+ - struct RefineAllGatherOpPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(AllGatherOp op, -@@ -655,9 +881,9 @@ - if (!operandType.hasRank()) - return rewriter.notifyMatchFailure(op, "expected ranked operand type"); - -- // This represents the cross_replica_and_partition process grouping strategy -- // that requires num_partitions to compute shardCount. Since we don't know -- // num_partitions at this point, we error out. -+ // This represents the cross_replica_and_partition process grouping -+ // strategy that requires num_partitions to compute shardCount. Since we -+ // don't know num_partitions at this point, we error out. - if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) - return rewriter.notifyMatchFailure(op, "unsupported strategy"); - DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); -@@ -678,12 +904,11 @@ - auto operandType = op.getOperand().getType(); - if (!operandType.hasRank()) - return rewriter.notifyMatchFailure(op, "expected ranked operand type"); -- -+ auto resultType = op.getType(); - // If bit widths of the operand and the result are different, then - // operand and result shapes have different ranks. - // This complicates the logic quite a bit and is not needed to pass the - // current tests, so we leave this for future work. -- auto resultType = op.getType(); - auto getBitWidthFn = [](ShapedType type) { - auto elementType = type.getElementType(); - if (auto complexType = elementType.dyn_cast()) -@@ -694,8 +919,77 @@ - if (getBitWidthFn(operandType) != getBitWidthFn(resultType)) - return rewriter.notifyMatchFailure(op, "unsupported bit width"); - -- return refineReturnShape(rewriter, op, operandType.getShape()); -- } -+ auto res = refineReturnShape(rewriter, op, operandType.getShape()); -+ if (failed(res)) return failure(); -+ if (op.getOperand().getType() == op.getResult().getType()) { -+ LLVM_DEBUG({ llvm::dbgs() << " ** remove no-op bitcast convert\n"; }); -+ rewriter.replaceOp(op, op.getOperand()); -+ } -+ return success(); -+ } -+}; -+ -+struct RefineCallOpPattern : public OpRewritePattern { -+ using OpRewritePattern::OpRewritePattern; -+ -+ RefineCallOpPattern(MLIRContext* context, RefineShapeState* state) -+ : OpRewritePattern(context), _state(state) {} -+ -+ LogicalResult matchAndRewrite(func::CallOp op, -+ PatternRewriter& rewriter) const override { -+ LLVM_DEBUG({ llvm::dbgs() << "refineCallOp " << debugString(op) << "\n"; }); -+ -+ // We have a number of prefix token arguments, then the dimension arguments -+ size_t nrPrefixTokenArguments = 0; -+ SmallVector dimensionArguments = -+ getDimensionArguments(op, &nrPrefixTokenArguments); -+ SmallVector nonDimensionArgumentTypes; -+ SmallVector nonDimensionArguments; -+ SmallVector operands = op.getOperands(); -+ for (size_t i = 0; i < operands.size(); ++i) { -+ // Skip the dimension arguments. -+ if (i >= nrPrefixTokenArguments && -+ i < nrPrefixTokenArguments + dimensionArguments.size()) { -+ continue; -+ } -+ nonDimensionArgumentTypes.push_back(operands[i].getType()); -+ nonDimensionArguments.push_back(operands[i]); -+ } -+ FlatSymbolRefAttr calleeName = op.getCalleeAttr(); -+ const SymbolTable symbolTable(op->getParentOfType()); -+ func::FuncOp callee = dyn_cast( -+ symbolTable.lookupNearestSymbolFrom(op, calleeName.getAttr())); -+ if (!callee) -+ return rewriter.notifyMatchFailure( -+ op, "cannot find callee in the current scope"); -+ if (failed(refineFunction(callee, rewriter.getContext(), _state, -+ nrPrefixTokenArguments, dimensionArguments, -+ nonDimensionArgumentTypes))) -+ return failure(); -+ -+ // Is the callee a constant function in this refinement context? -+ std::optional> constantAttrs = -+ isConstantFunction(callee); -+ if (constantAttrs.has_value()) { -+ SmallVector constants; -+ for (auto constAttr : constantAttrs.value()) { -+ constants.push_back( -+ rewriter.create(op.getLoc(), constAttr)); -+ } -+ rewriter.replaceOp(op, constants); -+ return success(); -+ } -+ if (!dimensionArguments.empty()) { -+ // Drop the dimension arguments, but only if necessary, or else we -+ // will end up trying to refine the new CallOp forever. -+ op = rewriter.replaceOpWithNewOp( -+ op, op.getResultTypes(), callee.getSymName(), nonDimensionArguments); -+ } -+ return refineReturnTypes(rewriter, op, callee.getResultTypes()); -+ } -+ -+ private: -+ RefineShapeState* _state; - }; - - struct RefineConvertOpPattern : public OpRewritePattern { -@@ -844,12 +1138,98 @@ +@@ -844,12 +845,97 @@ } }; @@ -2525,9 +1712,8 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl + return rewriter.notifyMatchFailure(op, "expected constant output_shape"); + + // We only need to refine the shape of `output` (the second result). -+ // The shape of `output_state` (the first result) is determined by the -+ // shape of `initial_state`, so we ignore it and provide an empty -+ // refinement. ++ // The shape of `output_state` (the first result) is determined by the shape ++ // of `initial_state`, so we ignore it and provide an empty refinement. + return refineReturnTypes(rewriter, op, {{initialStateType}, {outputShape}}); + } +}; @@ -2551,349 +1737,15 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl } }; -@@ -865,11 +1245,11 @@ - if (!isa(op->getDialect())) - return rewriter.notifyMatchFailure(op, "unsupported dialect"); - -- // For the ops that implement InferTypeOpInterface, we reinfer their return -- // types and see what happens. -- // Operands of these ops might have been refined elsewhere (e.g. someone -- // might have updated argument types of a function) or earlier during this -- // pass, and this might enable refinement opportunities downstream. -+ // For the ops that implement InferTypeOpInterface, we reinfer their -+ // return types and see what happens. Operands of these ops might have -+ // been refined elsewhere (e.g. someone might have updated argument types -+ // of a function) or earlier during this pass, and this might enable -+ // refinement opportunities downstream. - SmallVector inferredReturnTypes; - if (failed(op.inferReturnTypes(getContext(), /*location=*/{}, - op->getOperands(), op->getAttrDictionary(), -@@ -925,8 +1305,8 @@ - sliceSizesAttr.size(), - RankedTensorType::get({}, startIndicesElementType)); - -- // RealDynamicSliceOp can take tensors of integer or index element types. -- // DynamicSliceOp::slice_sizes only supports i64 element type. -+ // RealDynamicSliceOp can take tensors of integer or index element -+ // types. DynamicSliceOp::slice_sizes only supports i64 element type. - // Adapt accordingly in order to be compatible with inferDynamicSliceOp. - SmallVector sliceSizes; - for (auto element : sliceSizesAttr.getValues()) { -@@ -956,9 +1336,9 @@ - if (!operandType.hasRank()) - return rewriter.notifyMatchFailure(op, "expected ranked operand type"); - -- // This represents the cross_replica_and_partition process grouping strategy -- // that requires num_partitions to compute shardCount. Since we don't know -- // num_partitions at this point, we error out. -+ // This represents the cross_replica_and_partition process grouping -+ // strategy that requires num_partitions to compute shardCount. Since we -+ // don't know num_partitions at this point, we error out. - if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) - return rewriter.notifyMatchFailure(op, "unsupported strategy"); - DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); -@@ -998,9 +1378,9 @@ - PatternRewriter& rewriter) const override { - // Push the potentially refined operand types into the nested regions. - // This can lead to refinements of the return types of the body (but not -- // of the cond since it always returns tensor), but the key insight here -- // is that the enclosing while op doesn't care about these refinements -- // (because its return types are equal to its operand types). -+ // of the cond since it always returns tensor), but the key insight -+ // here is that the enclosing while op doesn't care about these -+ // refinements (because its return types are equal to its operand types). - // If we end up with incompatibilities between while's return types and - // body's return types, the verifier will tell us about that. This means - // that the original program wasn't well-formed. TODO(burmako): Implement -@@ -1050,8 +1430,8 @@ - if (failed(mostSpecificType) || destType == *mostSpecificType) continue; - - // If the source type of the cast is more specific than the target type, -- // then we conclude that the cast is redundant (i.e. needs to be removed) -- // and that the return type of the function needs an update. -+ // then we conclude that the cast is redundant (i.e. needs to be -+ // removed) and that the return type of the function needs an update. - needsUpdate = true; - updatedResultTypes[i] = sourceType; - -@@ -1066,9 +1446,6 @@ - for (auto cast : castsToReplace) - rewriter.replaceOp(cast, cast->getOperands()); - -- // If the type of the enclosing `func.func` needs an update, we simply -- // call setType. We can afford this simplicity because our algorithm -- // currently supports only one function per module. - auto func = cast(op->getParentOp()); - func.setType( - rewriter.getFunctionType(func.getArgumentTypes(), updatedResultTypes)); -@@ -1100,22 +1477,186 @@ - } - }; - -+LogicalResult applyRewritePatterns(func::FuncOp func, MLIRContext* context, -+ RefineShapeState* state) { -+ // TODO(#1048): Find out why .maxIterations = 1 no longer works. -+ // There have been recent refactors to applyPatternsAndFoldGreedily -+ // upstream, and that might be the reason. -+ GreedyRewriteConfig config; -+ config.useTopDownTraversal = true; -+ config.enableRegionSimplification = true; -+ config.maxIterations = 2; -+ config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; -+ config.strictMode = GreedyRewriteStrictness::AnyOp; -+ -+ RewritePatternSet patterns(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context, state); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { -+ func.emitOpError() << "applyPatternsAndFoldGreedily failed"; -+ return failure(); -+ } -+ return success(); -+} -+ -+LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, -+ RefineShapeState* state, -+ size_t nrPrefixTokenArguments, -+ SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes) { -+ // The nonDimensionArgumentTypes include the prefix token arguments. -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() << ": initial type " -+ << debugString(func.getFunctionType()) << "\n"; -+ llvm::dbgs() << " has " << nrPrefixTokenArguments << " prefix tokens\n"; -+ for (size_t i = 0; i < dimensionArguments.size(); ++i) { -+ llvm::dbgs() << " with dimension arg[" << i -+ << "] = " << dimensionArguments[i] << "\n"; -+ } -+ }); -+ // Check that the argument types have static shapes. -+ for (size_t i = 0; i < nonDimensionArgumentTypes.size(); ++i) { -+ if (i < nrPrefixTokenArguments) continue; -+ auto argType = nonDimensionArgumentTypes[i]; -+ if (isa(argType)) continue; -+ auto argRankedTensorType = dyn_cast(argType); -+ if (!argRankedTensorType || !argRankedTensorType.hasStaticShape()) { -+ func.emitOpError() << func.getName() -+ << " must be refined with static shape arguments. " -+ << "Found argument of type " << debugString(argType); -+ return failure(); -+ } -+ } -+ auto alreadyRefined = state->validateFunctionRefinement( -+ func, dimensionArguments, nonDimensionArgumentTypes); -+ if (failed(alreadyRefined)) { -+ return failure(); -+ } -+ if (*alreadyRefined) { -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() -+ << ": skipping, already refined\n"; -+ }); -+ return success(); -+ } -+ state->startFunctionRefinement(func, dimensionArguments, -+ nonDimensionArgumentTypes); -+ // Only one block per function is supported at the moment. -+ // At the StableHLO level, functions are expected to only have one block, -+ // so supporting more is out of scope for this pass. -+ if (!func.getRegion().hasOneBlock()) { -+ func.emitOpError() << "must have exactly one block"; -+ return failure(); -+ } -+ -+ // Replace all dimension arguments with constants and remove those arguments. -+ // Wrap non-dimension arguments with bitcast_convert. -+ OpBuilder op_builder(func.getRegion()); -+ op_builder.setInsertionPointToStart(&func.getRegion().front()); -+ size_t firstNonDimensionArg = -+ nrPrefixTokenArguments + dimensionArguments.size(); -+ for (size_t i = 0; i < func.getNumArguments(); ++i) { -+ BlockArgument arg = func.getArgument(i); -+ Type argType = arg.getType(); -+ if (i < nrPrefixTokenArguments) { -+ continue; -+ } -+ if (i < firstNonDimensionArg) { -+ ShapedType argShapedType = dyn_cast(argType); -+ if (!argShapedType) { -+ func.emitOpError() << "dimension arguments must have shaped types"; -+ return failure(); -+ } -+ // We will drop the dimension arguments, replace them with constants. -+ auto replacement_op = op_builder.create( -+ arg.getLoc(), argType, -+ getTensorAttr(argShapedType, -+ dimensionArguments[i - nrPrefixTokenArguments])); -+ arg.replaceAllUsesWith(replacement_op); -+ } else { -+ int nonDimensionArgumentIndex = -+ nrPrefixTokenArguments + i - firstNonDimensionArg; -+ Type refinedType = nonDimensionArgumentTypes[nonDimensionArgumentIndex]; -+ if (refinedType != argType) { -+ // We add BitcastConvertOp as the only uses of the non-dimension -+ // arguments to ensure the module stays valid after we set the argument -+ // type. -+ auto replacement_op = op_builder.create( -+ arg.getLoc(), argType, arg); -+ arg.replaceAllUsesExcept(replacement_op->getResult(0), replacement_op); -+ arg.setType(refinedType); -+ } -+ } -+ } -+ BitVector argIndices(func.getNumArguments()); -+ argIndices.set(nrPrefixTokenArguments, firstNonDimensionArg); -+ func.eraseArguments(argIndices); -+ func.setType(op_builder.getFunctionType(nonDimensionArgumentTypes, -+ func.getResultTypes())); -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() << ": set type to " -+ << func.getFunctionType() << "\n"; -+ }); -+ if (failed(applyRewritePatterns(func, context, state))) return failure(); -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() << ": end with type " -+ << debugString(func.getFunctionType()) << "\n"; -+ }); -+ if (failed(state->finishFunctionRefinement(func))) return failure(); -+ return success(); -+} -+ - struct StablehloRefineShapesPass - : public impl::StablehloRefineShapesPassBase { - using StablehloRefineShapesPassBase::StablehloRefineShapesPassBase; - - void runOnOperation() override { -- // Only one function per module is supported at the moment to avoid the need -- // to think about iterative type inference algorithms. -- // Current use cases are served well by inlining multiple functions into -- // a single function, so we leave native support for multiple functions to -- // future work. - // To enable modules that contain CustomCallOp::called_computations, - // we allow multiple functions, in which case we only refine the main - // function called "main", assuming that the called computations will have - // static shapes. Lifting this assumption and expanding refinement to - // multiple functions is left for future work. - ModuleOp module = getOperation(); -+ RefineShapeState state; - auto funcs = llvm::to_vector(module.getOps()); - if (funcs.empty()) return; - func::FuncOp func; -@@ -1130,70 +1671,14 @@ - << " function to clearly identify which function will be refined"; - return signalPassFailure(); - } -- -- // Similarly, only one block per function is supported at the moment. -- // At the StableHLO level, functions are expected to only have one block, -- // so supporting more is out of scope for this pass. -- if (!func.getRegion().hasOneBlock()) { -- func.emitOpError() << "must have exactly one block"; -+ SmallVector emptyDimensionArguments; -+ SmallVector nonDimensionArgumentTypes; -+ for (auto arg : func.getArguments()) -+ nonDimensionArgumentTypes.push_back(arg.getType()); -+ if (failed(refineFunction(func, &getContext(), &state, 0, -+ emptyDimensionArguments, -+ nonDimensionArgumentTypes))) - return signalPassFailure(); -- } -- -- // The algorithm behind this pass consists of a single traversal of the -- // function. This is sufficient because we only support one function per -- // program at the moment. -- // TODO(#1048): Find out why .maxIterations = 1 no longer works. -- // There have been recent refactors to applyPatternsAndFoldGreedily -- // upstream, and that might be the reason. -- GreedyRewriteConfig config; -- config.useTopDownTraversal = true; -- config.enableRegionSimplification = true; -- config.maxIterations = 2; -- config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; -- config.strictMode = GreedyRewriteStrictness::AnyOp; -- -- RewritePatternSet patterns(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- if (failed( -- applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { -- return signalPassFailure(); -- } - } - }; - +@@ -1181,7 +1267,10 @@ + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); ++ patterns.add(&getContext()); + patterns.add(&getContext()); ++ patterns.add(&getContext()); ++ patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); diff --git a/third_party/xla/third_party/stablehlo/temporary.patch b/third_party/xla/third_party/stablehlo/temporary.patch index 0cb078c3b89795..4c4228163a6f04 100644 --- a/third_party/xla/third_party/stablehlo/temporary.patch +++ b/third_party/xla/third_party/stablehlo/temporary.patch @@ -1426,353 +1426,7 @@ diff --ruN a/stablehlo/stablehlo/tests/stablehlo_canonicalize_dynamism.mlir b/st diff --ruN a/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir b/stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir --- stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir +++ stablehlo/stablehlo/tests/stablehlo_refine_shapes.mlir -@@ -31,6 +31,7 @@ - - // ----- - -+// CHECK-LABEL: module @has_main - module @has_main { - // CHECK: main - func.func @main(%arg0: tensor<4xf32>) -> tensor<*xi32> { -@@ -38,17 +39,11 @@ - %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> - func.return %0 : tensor<*xi32> - } -- -- // CHECK: helper -- func.func @helper(%arg0: tensor<4xf32>) -> tensor<*xi32> { -- // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<*xi32> -- %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> -- func.return %0 : tensor<*xi32> -- } --} -- --// ----- -- -+} -+ -+// ----- -+ -+// CHECK-LABEL: func @error_unsupported_operation - func.func @error_unsupported_operation(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> index { - // CHECK: stablehlo.add{{.*}} -> tensor - %0 = stablehlo.add %arg0, %arg1 : (tensor<4xf32>, tensor<4xf32>) -> tensor -@@ -472,11 +467,312 @@ - - // ----- - --// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth --func.func @refine_bitcast_convert_same_bitwidth(%arg0 : tensor<4xf32>) -> tensor<*xi32> { -+// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth_unranked_result -+func.func @refine_bitcast_convert_same_bitwidth_unranked_result(%arg0 : tensor<4xf32>) -> tensor<*xi32> { - // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<4xi32> - %0 = stablehlo.bitcast_convert %arg0 : (tensor<4xf32>) -> tensor<*xi32> - func.return %0 : tensor<*xi32> -+} -+ -+// ----- -+ -+// CHECK-LABEL: func @refine_bitcast_convert_same_bitwidth -+func.func @refine_bitcast_convert_same_bitwidth() -> tensor { -+ %0 = stablehlo.constant dense<[3, 5, 0]> : tensor<3xi32> -+ %21 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<3xi32>) -> tensor -+ // CHECK: stablehlo.bitcast_convert{{.*}} -> tensor<3x5x0xf32> -+ %48 = stablehlo.bitcast_convert %21 : (tensor) -> tensor -+ return %48 : tensor -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call -+module @refine_call { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %1 = stablehlo.constant dense<4> : tensor -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ %2 = call @refine_call_callee(%1, %0) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // CHECK: refine_call_callee(%arg0: tensor<4xf32>) -> tensor<4xf32> -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ // CHECK: stablehlo.constant dense<4> -+ %0 = stablehlo.reshape %arg0 : (tensor) -> tensor<1xi32> -+ %1 = stablehlo.dynamic_iota %0, dim = 0 : (tensor<1xi32>) -> tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_arguments -+module @refine_call_dimension_arguments { -+ func.func public @main(%arg0: tensor) -> tensor { -+ // CHECK: [[RESULT:%.*]] = call @callee -+ // CHECK: return [[RESULT]] -+ %0 = stablehlo.constant dense<3> : tensor -+ %1 = call @callee(%0, %0, %arg0) : (tensor, tensor, tensor) -> tensor -+ return %1 : tensor -+ } -+ // %arg0 and %arg1 are dimension arguments -+ // CHECK: @callee([[ARG0:%.*]]: tensor) -> tensor -+ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> -+ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg1: tensor -+ %1 = stablehlo.add %0, %arg2: tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_prefix_token_and_dimension_arguments -+module @refine_call_prefix_token_and_dimension_arguments { -+ func.func public @main(%arg0: tensor) -> tensor { -+ // CHECK: [[RESULT:%.*]] = call @callee -+ // CHECK: return [[RESULT]] -+ %0 = stablehlo.constant dense<3> : tensor -+ %token = stablehlo.create_token : !stablehlo.token -+ %1 = call @callee(%token, %0, %0, %arg0) : (!stablehlo.token, tensor, tensor, tensor) -> tensor -+ return %1 : tensor -+ } -+ // %arg0 and %arg1 are dimension arguments -+ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor -+ func.func private @callee(%arg_token: !stablehlo.token, %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> -+ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg1: tensor -+ %1 = stablehlo.add %0, %arg2: tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_arguments_followed_by_token -+module @refine_call_dimension_arguments_followed_by_token { -+ func.func public @main(%arg0: tensor) -> tensor { -+ // CHECK: [[RESULT:%.*]] = call @callee -+ // CHECK: return [[RESULT]] -+ %0 = stablehlo.constant dense<3> : tensor -+ %token = stablehlo.create_token : !stablehlo.token -+ %1 = call @callee(%0, %0, %token, %arg0) : (tensor, tensor, !stablehlo.token, tensor) -> tensor -+ return %1 : tensor -+ } -+ // %arg0 and %arg1 are dimension arguments -+ // CHECK: @callee([[ARG_TOKEN:%.*]]: !stablehlo.token, [[ARG0:%.*]]: tensor -+ func.func private @callee(%arg0: tensor, %arg1: tensor, %arg_token: !stablehlo.token, %arg2: tensor) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<6> -+ // CHECK: [[RESULT1:%.*]] = stablehlo.add [[RESULT0]], [[ARG0]] -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg1: tensor -+ %1 = stablehlo.add %0, %arg2: tensor -+ return %1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_multiple_call_with_same_context -+module @refine_multiple_call_with_same_context { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ %2 = call @refine_call_callee(%arg0_new, %1) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // CHECK: refine_call_callee{{.*}}-> tensor<4xf32> -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_multiple_call_constant_function -+module @refine_multiple_call_constant_function { -+ func.func @main(%arg0: tensor<5xf32>) -> tensor { -+ // CHECK: [[RESULT0:%.*]] = stablehlo.constant dense<16> -+ // CHECK: return [[RESULT0]] -+ %0 = stablehlo.constant dense<4> : tensor -+ %1 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor -+ %2 = call @refine_call_callee(%0, %arg0) : (tensor, tensor<5xf32>) -> tensor -+ %3 = stablehlo.add %1, %2: tensor -+ return %3 : tensor -+ } -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor<5xf32>) -> tensor { -+ // CHECK: [[RESULT1:%.*]] = stablehlo.constant dense<8> -+ // CHECK: return [[RESULT1]] -+ %0 = stablehlo.add %arg0, %arg0: tensor -+ return %0 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_multiple_with_different_number_dimension_arguments { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ // Ensure that the first argument is not a constant at the second call site -+ %arg0_different_f32 = stablehlo.bitcast_convert %arg0_new : (tensor) -> tensor -+ %arg0_different_i32 = stablehlo.bitcast_convert %arg0_different_f32 : (tensor) -> tensor -+ // expected-error@+1{{incorrect number of operands for callee}} -+ %2 = call @refine_call_callee(%arg0_different_i32, %1) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context. Previous context had 1 and now we have 2 non-dimension arguments}} -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_multiple_different_dimension_arguments { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ %arg0_different = stablehlo.add %arg0_new, %arg0_new : tensor -+ // expected-error@+1{{incorrect number of operands for callee}} -+ %2 = call @refine_call_callee(%arg0_different, %1) : (tensor, tensor) -> tensor -+ return %2 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_multiple_different_non_dimension_arguments { -+ func.func @main(%arg1: tensor<4xf32>) -> tensor { -+ %0 = stablehlo.bitcast_convert %arg1 : (tensor<4xf32>) -> tensor -+ %arg0_new = "stablehlo.get_dimension_size"(%0) {dimension = 0 : i64} : (tensor) -> tensor -+ %1 = call @refine_call_callee(%arg0_new, %0) : (tensor, tensor) -> tensor -+ %2 = stablehlo.constant dense<[1., 2.]> : tensor<2xf32> -+ %3 = stablehlo.concatenate %1, %2, dim = 0 : (tensor, tensor<2xf32>) -> tensor -+ // expected-error@+1{{incorrect number of operands for callee}} -+ %4 = call @refine_call_callee(%arg0_new, %3) : (tensor, tensor) -> tensor -+ return %4 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee has already been refined with a different refinement context.}} -+ func.func @refine_call_callee(%arg0: tensor, %arg1: tensor) -> tensor { -+ return %arg1 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_recursive { -+ func.func @main() -> tensor { -+ %0 = stablehlo.constant dense<3> : tensor -+ %1 = call @refine_call_callee(%0) : (tensor) -> tensor -+ return %1 : tensor -+ } -+ // expected-error@+1{{Function refine_call_callee is being refined recursively}} -+ func.func @refine_call_callee(%arg0: tensor) -> tensor { -+ // expected-error@+1{{incorrect number of operands}} -+ %0 = call @refine_call_callee(%arg0) : (tensor) -> tensor -+ return %0 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_main_argument_unranked { -+ // expected-error@+1{{main must be refined with static shape arguments}} -+ func.func public @main(%arg0: tensor<*xi32>) -> tensor<*xi32> { -+ %2 = call @callee(%arg0) : (tensor<*xi32>) -> tensor<*xi32> -+ return %2 : tensor<*xi32> -+ } -+ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { -+ return %arg0 : tensor<*xi32> -+ } -+} -+ -+// ----- -+ -+module @refine_call_main_argument_dynamic_shape { -+ // expected-error@+1{{main must be refined with static shape arguments}} -+ func.func public @main(%arg0: tensor) -> tensor { -+ %2 = call @callee(%arg0) : (tensor) -> tensor -+ return %2 : tensor -+ } -+ func.func private @callee(%arg0: tensor) -> tensor { -+ return %arg0 : tensor -+ } -+} -+ -+// ----- -+ -+module @refine_call_callee_argument_unranked { -+ func.func public @main(%arg0: tensor<1xi64>) -> tensor<*xi32> { -+ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor<*xi32> -+ %2 = call @callee(%1) : (tensor<*xi32>) -> tensor<*xi32> -+ return %2 : tensor<*xi32> -+ } -+ // expected-error@+1{{callee must be refined with static shape arguments}} -+ func.func private @callee(%arg0: tensor<*xi32>) -> tensor<*xi32> { -+ return %arg0 : tensor<*xi32> -+ } -+} -+ -+// ----- -+ -+module @refine_call_callee_argument_dynamic_shape { -+ func.func public @main(%arg0: tensor<1xi64>) -> tensor { -+ %1 = stablehlo.dynamic_iota %arg0, dim = 0 : (tensor<1xi64>) -> tensor -+ %2 = call @callee(%1) : (tensor) -> tensor -+ return %2 : tensor -+ } -+ // expected-error@+1{{callee must be refined with static shape arguments}} -+ func.func private @callee(%arg0: tensor) -> tensor { -+ return %arg0 : tensor -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_argument_non_scalar -+// The non-scalar constant is not folded into the callee -+module @refine_call_dimension_argument_non_scalar { -+ func.func public @main() -> tensor<4xi32> { -+ // CHECK: dense<[1, 2, 3, 4]> : tensor<4xi32> -+ %0 = stablehlo.constant dense<[1, 2, 3, 4]> : tensor<4xi32> -+ %1 = call @callee(%0) : (tensor<4xi32>) -> tensor<4xi32> -+ return %1 : tensor<4xi32> -+ } -+ func.func private @callee(%arg0: tensor<4xi32>) -> tensor<4xi32> { -+ // CHECK: return %arg0 : tensor<4xi32> -+ return %arg0 : tensor<4xi32> -+ } -+} -+ -+// ----- -+ -+// CHECK-LABEL: module @refine_call_dimension_argument_not_integer -+module @refine_call_dimension_argument_not_integer { -+ func.func public @main() -> tensor { -+ %0 = stablehlo.constant dense<3.> : tensor -+ // CHECK: call @callee({{.*}}) : (tensor) -> tensor -+ %2 = call @callee(%0) : (tensor) -> tensor -+ return %2 : tensor -+ } -+ func.func private @callee(%arg0: tensor) -> tensor { -+ return %arg0 : tensor -+ } - } - - // ----- -@@ -607,12 +903,55 @@ +@@ -607,12 +607,55 @@ // ----- @@ -1977,74 +1631,7 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloCanonicalizeDynamism.cpp b/ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp --- stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp +++ stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp -@@ -11,9 +11,48 @@ - See the License for the specific language governing permissions and - limitations under the License. - ==============================================================================*/ -- -+/* -+This shape refinement pass was designed to resolve the dynamic shapes in -+a StableHLO module produced by JAX serialization with shape polymorphism. -+Such a module has the following properties: -+ -+ * it contains a "main" function with statically-shaped arguments; -+ the result types may be dynamically shaped. -+ * all the dynamic shapes depend only on the input shapes (no shape -+ dependency on the input array contents). We refer to the operations that -+ depend transitively only on the input shapes (e.g., as given by -+ `stablehlo.get_dimension_size`) as `dimension` operations. -+ All dimension values can be resolved to constants through inter-procedural -+ constant folding. -+ * intermediate functions may take a number of token arguments (of type -+ !stablehlo.token) at the start of the argument list, followed by some -+ dimension arguments (integer scalars). -+ * some intermediate functions may return dimension values. -+ E.g., the `floordiv` operation on dimension values may be implemented -+ using intermediate functions. These constant functions need to be -+ constant-folded. -+ * All the dynamic shapes can be resolved through shape inference from the -+ dimension values. The dimension values themselves do not depend on the -+ result of shape inference. -+ -+ -+For each intermediate function we compute a refinement context, including -+the values of the dimension arguments and the static shapes of the other -+arguments. We compute the refinement context when we encounter a function call, -+and then we refine the callee recursively. We abort in the presence of -+recursive calls. -+We also abort if a function is called with multiple distinct refinement -+contexts. -+ -+After refinement, all operations should have static shapes, all calls to -+constant functions are replaced with constants, and all dimension arguments -+for intermediate functions are dropped and are replaced with constants. -+*/ -+#include - #include - #include -+#include -+#include - #include - #include - -@@ -24,8 +63,10 @@ - #include "llvm/ADT/SmallSet.h" - #include "llvm/ADT/SmallVector.h" - #include "llvm/ADT/StringRef.h" -+#include "llvm/Support/Debug.h" - #include "llvm/Support/ErrorHandling.h" - #include "llvm/Support/FormatVariadic.h" -+#include "llvm/Support/ScopedPrinter.h" - #include "mlir/Dialect/Func/IR/FuncOps.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/IR/BuiltinOps.h" -@@ -39,10 +80,13 @@ - #include "mlir/IR/Types.h" - #include "mlir/IR/Value.h" - #include "mlir/Interfaces/InferTypeOpInterface.h" -+#include "mlir/Support/DebugStringHelper.h" - #include "mlir/Support/LogicalResult.h" -+#include "mlir/Support/LLVM.h" +@@ -43,6 +43,7 @@ #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "stablehlo/dialect/Base.h" #include "stablehlo/dialect/ChloOps.h" @@ -2052,407 +1639,7 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl #include "stablehlo/dialect/StablehloOps.h" #include "stablehlo/dialect/TypeInference.h" #include "stablehlo/transforms/Passes.h" -@@ -50,10 +94,144 @@ - namespace mlir { - namespace stablehlo { - -+#define DEBUG_TYPE "stablehlo-refine-shapes" -+ - #define GEN_PASS_DEF_STABLEHLOREFINESHAPESPASS - #include "stablehlo/transforms/Passes.h.inc" - - namespace { -+ -+// Per-module state for shape refinement. -+class RefineShapeState { -+ public: -+ // Validates that we are not attempting to refine a function with a different -+ // context than previously, and are not attempting recursive refinement. -+ // Returns failure() if validation fails. On success, returns a boolean -+ // that specifies whether the function has already been refined. -+ FailureOr validateFunctionRefinement( -+ func::FuncOp func, SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes) { -+ StringRef funcName = func.getName(); -+ auto found = refinementContexts.find(func); -+ if (found == refinementContexts.end()) { -+ return false; // not already refined. -+ } -+ auto prevDimensionArguments = std::get<0>(found->second); -+ auto prevNonDimensionArgumentTypes = std::get<1>(found->second); -+ // Since we refine until fixed point, we will refine a call to a function -+ // both for the original function and for the refined one. In the latter -+ // case, we should have empty dimensionArguments but the same -+ // nonDimensionArgumentTypes. -+ if (prevNonDimensionArgumentTypes != nonDimensionArgumentTypes || -+ (!dimensionArguments.empty() && -+ prevDimensionArguments != dimensionArguments)) { -+ emitDifferentRefinementContextError( -+ func, /*dimensionArguments=*/dimensionArguments, -+ /*nonDimensionArgumentTypes=*/nonDimensionArgumentTypes, -+ /*prevDimensionArguments=*/prevDimensionArguments, -+ /*prevNonDimensionArgumentShapes=*/prevNonDimensionArgumentTypes); -+ return failure(); -+ } -+ for (auto funcOnStack : functionsBeingRefined) { -+ if (funcOnStack == funcName) { -+ func.emitOpError() << "Function " << funcName -+ << " is being refined recursively\n"; -+ return failure(); -+ } -+ } -+ return true; // already refined. -+ } -+ -+ // Updates the state to signal the starting of a function refinement. -+ // Callers must call `finishFunctionRefinement` when done. -+ void startFunctionRefinement(func::FuncOp func, -+ SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes) { -+ StringRef funcName = func.getName(); -+ functionsBeingRefined.push_back(funcName); -+ refinementContexts[func] = -+ std::make_tuple(dimensionArguments, nonDimensionArgumentTypes); -+ } -+ -+ // Updates the state to signal the starting of a function refinement. -+ LogicalResult finishFunctionRefinement(func::FuncOp func) { -+ if (func.getName() != -+ functionsBeingRefined[functionsBeingRefined.size() - 1]) { -+ func.emitOpError() << "Expected to find " << func.getName() -+ << " at the top of the stack"; -+ return failure(); -+ } -+ functionsBeingRefined.pop_back(); -+ return success(); -+ } -+ -+ private: -+ // Maps refined functions to the refinement context: the values of dimension -+ // arguments and the types of non-dimension arguments. A function is added -+ // here when we start refining it. -+ DenseMap, SmallVector>> -+ refinementContexts; -+ -+ // A stack of functions that are in the process of being refined, the current -+ // one is last. -+ SmallVector functionsBeingRefined; -+ -+ void emitDifferentRefinementContextError( -+ func::FuncOp func, SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes, -+ SmallVector prevDimensionArguments, -+ SmallVector prevNonDimensionArgumentShapes) { -+ InFlightDiagnostic msg = func.emitOpError(); -+ msg << "Function " << func.getName() -+ << " has already been refined with a different " -+ "refinement context. "; -+ int countShowNonDimensionArguments = -+ std::min(prevNonDimensionArgumentShapes.size(), -+ nonDimensionArgumentTypes.size()); -+ if (prevNonDimensionArgumentShapes.size() != -+ nonDimensionArgumentTypes.size()) { -+ msg << "Previous context had " << prevNonDimensionArgumentShapes.size() -+ << " and now we have " << nonDimensionArgumentTypes.size() -+ << " non-dimension arguments. "; -+ } -+ msg << "The differences among the first " << countShowNonDimensionArguments -+ << " non-dimension argument types are: "; -+ for (auto i = 0; i < countShowNonDimensionArguments; ++i) { -+ if (prevNonDimensionArgumentShapes[i] != nonDimensionArgumentTypes[i]) { -+ msg << "Non-dimension argument[" << i << "] previously had type " -+ << debugString(prevNonDimensionArgumentShapes[i]) -+ << " and now has type " << debugString(nonDimensionArgumentTypes[i]) -+ << ". "; -+ } -+ } -+ int countShowDimensionArguments = -+ std::min(prevDimensionArguments.size(), dimensionArguments.size()); -+ if (prevDimensionArguments.size() != dimensionArguments.size()) { -+ msg << "Previous context had " << prevDimensionArguments.size() -+ << " and now we have " << dimensionArguments.size() -+ << " dimension arguments. "; -+ } -+ msg << "The differences among the first " << countShowDimensionArguments -+ << " dimension arguments are: "; -+ for (auto i = 0; i < countShowDimensionArguments; ++i) { -+ if (prevDimensionArguments[i] != dimensionArguments[i]) { -+ msg << "Dimension argument[" << i << "] previously was " -+ << prevDimensionArguments[i].getSExtValue() << " and now is " -+ << dimensionArguments[i].getSExtValue() << ". "; -+ } -+ } -+ } -+}; -+ -+// Refines a function. -+// Returns `true` if the function had already been processed with the same -+// refinement context and `false` if this is the first time we refined the -+// function. Returns failure() if we encounter an error. -+LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, -+ RefineShapeState* state, -+ size_t nrPrefixTokenArguments, -+ SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes); - - // DenseElementsAttr can be constructed from ArrayRef but not from - // ArrayRef. This helper bridges the gap. -@@ -424,11 +602,10 @@ - diag << "refineValues failed for " << types << ": expected " - << values.size() << " types, got " << types.size(); - }); -- -- // Check whether `types` contain any new information with respect to existing -- // return types. Even if just a single dimension size out of an entire tensor -- // type got updated, using `inferMostSpecificType` ensures that we don't -- // miss that. -+ // Check whether `types` contain any new information with respect to -+ // existing return types. Even if just a single dimension size out of an -+ // entire tensor type got updated, using `inferMostSpecificType` ensures -+ // that we don't miss that. - bool needsRefinement = false; - SmallVector refinedTypes; - for (auto it : llvm::zip(values.getTypes(), types)) { -@@ -468,11 +645,13 @@ - - // Simply changing operand type of `func.return` won't work because - // that won't update the FunctionType of the enclosing `func.func`. -- // Nonetheless, we still want to support these ops because they are widely -- // used in StableHLO programs (although the plan of record is to replace -- // `func.return` ops in StableHLO programs with `stablehlo.return`: -- // https://github.com/openxla/stablehlo/issues/425). -+ // Nonetheless, we still want to support these ops because they are -+ // widely used in StableHLO programs (although the plan of record is to -+ // replace `func.return` ops in StableHLO programs with -+ // `stablehlo.return`: https://github.com/openxla/stablehlo/issues/425). - if (isa(user)) continue; -+ -+ if (isa(user)) continue; - - // Unlike in TensorFlow's type inference pass, here we work only with - // allowlisted ops to focus our support on well-defined semantics of -@@ -489,7 +668,8 @@ - value.setType(refinedType); - - // Special case: for `func.return`, guard the refinement with a cast -- // and leave propagation of the refined return type to a dedicated pattern. -+ // and leave propagation of the refined return type to a dedicated -+ // pattern. - auto isFuncReturn = [](OpOperand& use) -> bool { - return isa(use.getOwner()); - }; -@@ -505,8 +685,8 @@ - - // Refines the return types of the given operation using the given types. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, - ArrayRef types) { - if (failed(refineValues(rewriter, op, op->getResults(), types))) -@@ -528,12 +708,12 @@ - // traversal, and only then we apply the refinements. If there are other - // types, then the corresponding refinements must be completely empty. - // 2) Encodings are not supported. In principle, TypeExtensions should be --// supportable, but this needs careful thinking through. Given that no one --// asked for support for bounded dynamism in this pass yet, this is left --// for future work. -+// supportable, but this needs careful thinking through. Given that no -+// one asked for support for bounded dynamism in this pass yet, this is -+// left for future work. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - LogicalResult refineReturnTypes(PatternRewriter& rewriter, Operation* op, - ArrayRef refinements) { - SmallVector flattenedTypes; -@@ -623,8 +803,8 @@ - - // Refines the return type of the given operation using the given shape. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - template - LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, - ArrayRef shape) { -@@ -633,8 +813,8 @@ - - // Refines the return type of the given operation using the given shape. - // This function also signals PatternRewriter that it needs to visit all the --// users of this op if any updates to its results have happened during execution --// of the function. -+// users of this op if any updates to its results have happened during -+// execution of the function. - template - LogicalResult refineReturnShape(PatternRewriter& rewriter, OpType op, - Value shapeValue) { -@@ -647,6 +827,52 @@ - return refineReturnShape(rewriter, op, shape); - } - -+// Dimension arguments are leading scalar constant arguments, optionally -+// preceeded by some stablehlo.token arguments. -+SmallVector getDimensionArguments(func::CallOp callOp, -+ size_t* nrPrefixTokenArguments) { -+ *nrPrefixTokenArguments = 0; -+ SmallVector operands = callOp.getOperands(); -+ SmallVector dimensionArguments; -+ for (size_t i = 0; i < operands.size(); ++i) { -+ if (i == *nrPrefixTokenArguments && isa(operands[i].getType())) { -+ (*nrPrefixTokenArguments)++; -+ continue; -+ } -+ RankedTensorType operandType = -+ dyn_cast(operands[i].getType()); -+ if (!operandType || operandType.getRank() != 0 || -+ !operandType.getElementType().template isa()) -+ break; -+ SmallVector operand_int; -+ if (failed(hlo::matchInts(operands[i], operand_int))) { -+ break; -+ } -+ dimensionArguments.push_back(operand_int[0]); -+ } -+ return dimensionArguments; -+} -+ -+std::optional> isConstantFunction( -+ func::FuncOp func) { -+ LLVM_DEBUG(llvm::dbgs() << "check if " << func.getName() -+ << " is a constant function\n"); -+ SmallVector returnedConstants; -+ func::ReturnOp ret = *func.getOps().begin(); -+ bool isConstant = llvm::all_of(ret->getOperands(), [&](auto returnVal) { -+ DenseIntElementsAttr attr; -+ Operation* return_operand_def = returnVal.getDefiningOp(); -+ if (return_operand_def && -+ matchPattern(return_operand_def, m_Constant(&attr))) { -+ returnedConstants.push_back(attr); -+ return true; -+ } -+ return false; -+ }); -+ if (isConstant) return returnedConstants; -+ return std::nullopt; -+} -+ - struct RefineAllGatherOpPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(AllGatherOp op, -@@ -655,9 +881,9 @@ - if (!operandType.hasRank()) - return rewriter.notifyMatchFailure(op, "expected ranked operand type"); - -- // This represents the cross_replica_and_partition process grouping strategy -- // that requires num_partitions to compute shardCount. Since we don't know -- // num_partitions at this point, we error out. -+ // This represents the cross_replica_and_partition process grouping -+ // strategy that requires num_partitions to compute shardCount. Since we -+ // don't know num_partitions at this point, we error out. - if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) - return rewriter.notifyMatchFailure(op, "unsupported strategy"); - DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); -@@ -678,12 +904,11 @@ - auto operandType = op.getOperand().getType(); - if (!operandType.hasRank()) - return rewriter.notifyMatchFailure(op, "expected ranked operand type"); -- -+ auto resultType = op.getType(); - // If bit widths of the operand and the result are different, then - // operand and result shapes have different ranks. - // This complicates the logic quite a bit and is not needed to pass the - // current tests, so we leave this for future work. -- auto resultType = op.getType(); - auto getBitWidthFn = [](ShapedType type) { - auto elementType = type.getElementType(); - if (auto complexType = elementType.dyn_cast()) -@@ -694,8 +919,77 @@ - if (getBitWidthFn(operandType) != getBitWidthFn(resultType)) - return rewriter.notifyMatchFailure(op, "unsupported bit width"); - -- return refineReturnShape(rewriter, op, operandType.getShape()); -- } -+ auto res = refineReturnShape(rewriter, op, operandType.getShape()); -+ if (failed(res)) return failure(); -+ if (op.getOperand().getType() == op.getResult().getType()) { -+ LLVM_DEBUG({ llvm::dbgs() << " ** remove no-op bitcast convert\n"; }); -+ rewriter.replaceOp(op, op.getOperand()); -+ } -+ return success(); -+ } -+}; -+ -+struct RefineCallOpPattern : public OpRewritePattern { -+ using OpRewritePattern::OpRewritePattern; -+ -+ RefineCallOpPattern(MLIRContext* context, RefineShapeState* state) -+ : OpRewritePattern(context), _state(state) {} -+ -+ LogicalResult matchAndRewrite(func::CallOp op, -+ PatternRewriter& rewriter) const override { -+ LLVM_DEBUG({ llvm::dbgs() << "refineCallOp " << debugString(op) << "\n"; }); -+ -+ // We have a number of prefix token arguments, then the dimension arguments -+ size_t nrPrefixTokenArguments = 0; -+ SmallVector dimensionArguments = -+ getDimensionArguments(op, &nrPrefixTokenArguments); -+ SmallVector nonDimensionArgumentTypes; -+ SmallVector nonDimensionArguments; -+ SmallVector operands = op.getOperands(); -+ for (size_t i = 0; i < operands.size(); ++i) { -+ // Skip the dimension arguments. -+ if (i >= nrPrefixTokenArguments && -+ i < nrPrefixTokenArguments + dimensionArguments.size()) { -+ continue; -+ } -+ nonDimensionArgumentTypes.push_back(operands[i].getType()); -+ nonDimensionArguments.push_back(operands[i]); -+ } -+ FlatSymbolRefAttr calleeName = op.getCalleeAttr(); -+ const SymbolTable symbolTable(op->getParentOfType()); -+ func::FuncOp callee = dyn_cast( -+ symbolTable.lookupNearestSymbolFrom(op, calleeName.getAttr())); -+ if (!callee) -+ return rewriter.notifyMatchFailure( -+ op, "cannot find callee in the current scope"); -+ if (failed(refineFunction(callee, rewriter.getContext(), _state, -+ nrPrefixTokenArguments, dimensionArguments, -+ nonDimensionArgumentTypes))) -+ return failure(); -+ -+ // Is the callee a constant function in this refinement context? -+ std::optional> constantAttrs = -+ isConstantFunction(callee); -+ if (constantAttrs.has_value()) { -+ SmallVector constants; -+ for (auto constAttr : constantAttrs.value()) { -+ constants.push_back( -+ rewriter.create(op.getLoc(), constAttr)); -+ } -+ rewriter.replaceOp(op, constants); -+ return success(); -+ } -+ if (!dimensionArguments.empty()) { -+ // Drop the dimension arguments, but only if necessary, or else we -+ // will end up trying to refine the new CallOp forever. -+ op = rewriter.replaceOpWithNewOp( -+ op, op.getResultTypes(), callee.getSymName(), nonDimensionArguments); -+ } -+ return refineReturnTypes(rewriter, op, callee.getResultTypes()); -+ } -+ -+ private: -+ RefineShapeState* _state; - }; - - struct RefineConvertOpPattern : public OpRewritePattern { -@@ -844,12 +1138,98 @@ +@@ -844,12 +845,97 @@ } }; @@ -2525,9 +1712,8 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl + return rewriter.notifyMatchFailure(op, "expected constant output_shape"); + + // We only need to refine the shape of `output` (the second result). -+ // The shape of `output_state` (the first result) is determined by the -+ // shape of `initial_state`, so we ignore it and provide an empty -+ // refinement. ++ // The shape of `output_state` (the first result) is determined by the shape ++ // of `initial_state`, so we ignore it and provide an empty refinement. + return refineReturnTypes(rewriter, op, {{initialStateType}, {outputShape}}); + } +}; @@ -2551,349 +1737,15 @@ diff --ruN a/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp b/stablehl } }; -@@ -865,11 +1245,11 @@ - if (!isa(op->getDialect())) - return rewriter.notifyMatchFailure(op, "unsupported dialect"); - -- // For the ops that implement InferTypeOpInterface, we reinfer their return -- // types and see what happens. -- // Operands of these ops might have been refined elsewhere (e.g. someone -- // might have updated argument types of a function) or earlier during this -- // pass, and this might enable refinement opportunities downstream. -+ // For the ops that implement InferTypeOpInterface, we reinfer their -+ // return types and see what happens. Operands of these ops might have -+ // been refined elsewhere (e.g. someone might have updated argument types -+ // of a function) or earlier during this pass, and this might enable -+ // refinement opportunities downstream. - SmallVector inferredReturnTypes; - if (failed(op.inferReturnTypes(getContext(), /*location=*/{}, - op->getOperands(), op->getAttrDictionary(), -@@ -925,8 +1305,8 @@ - sliceSizesAttr.size(), - RankedTensorType::get({}, startIndicesElementType)); - -- // RealDynamicSliceOp can take tensors of integer or index element types. -- // DynamicSliceOp::slice_sizes only supports i64 element type. -+ // RealDynamicSliceOp can take tensors of integer or index element -+ // types. DynamicSliceOp::slice_sizes only supports i64 element type. - // Adapt accordingly in order to be compatible with inferDynamicSliceOp. - SmallVector sliceSizes; - for (auto element : sliceSizesAttr.getValues()) { -@@ -956,9 +1336,9 @@ - if (!operandType.hasRank()) - return rewriter.notifyMatchFailure(op, "expected ranked operand type"); - -- // This represents the cross_replica_and_partition process grouping strategy -- // that requires num_partitions to compute shardCount. Since we don't know -- // num_partitions at this point, we error out. -+ // This represents the cross_replica_and_partition process grouping -+ // strategy that requires num_partitions to compute shardCount. Since we -+ // don't know num_partitions at this point, we error out. - if (op.getChannelHandle() && !op.getUseGlobalDeviceIds()) - return rewriter.notifyMatchFailure(op, "unsupported strategy"); - DenseIntElementsAttr replicaGroups = op.getReplicaGroups(); -@@ -998,9 +1378,9 @@ - PatternRewriter& rewriter) const override { - // Push the potentially refined operand types into the nested regions. - // This can lead to refinements of the return types of the body (but not -- // of the cond since it always returns tensor), but the key insight here -- // is that the enclosing while op doesn't care about these refinements -- // (because its return types are equal to its operand types). -+ // of the cond since it always returns tensor), but the key insight -+ // here is that the enclosing while op doesn't care about these -+ // refinements (because its return types are equal to its operand types). - // If we end up with incompatibilities between while's return types and - // body's return types, the verifier will tell us about that. This means - // that the original program wasn't well-formed. TODO(burmako): Implement -@@ -1050,8 +1430,8 @@ - if (failed(mostSpecificType) || destType == *mostSpecificType) continue; - - // If the source type of the cast is more specific than the target type, -- // then we conclude that the cast is redundant (i.e. needs to be removed) -- // and that the return type of the function needs an update. -+ // then we conclude that the cast is redundant (i.e. needs to be -+ // removed) and that the return type of the function needs an update. - needsUpdate = true; - updatedResultTypes[i] = sourceType; - -@@ -1066,9 +1446,6 @@ - for (auto cast : castsToReplace) - rewriter.replaceOp(cast, cast->getOperands()); - -- // If the type of the enclosing `func.func` needs an update, we simply -- // call setType. We can afford this simplicity because our algorithm -- // currently supports only one function per module. - auto func = cast(op->getParentOp()); - func.setType( - rewriter.getFunctionType(func.getArgumentTypes(), updatedResultTypes)); -@@ -1100,22 +1477,186 @@ - } - }; - -+LogicalResult applyRewritePatterns(func::FuncOp func, MLIRContext* context, -+ RefineShapeState* state) { -+ // TODO(#1048): Find out why .maxIterations = 1 no longer works. -+ // There have been recent refactors to applyPatternsAndFoldGreedily -+ // upstream, and that might be the reason. -+ GreedyRewriteConfig config; -+ config.useTopDownTraversal = true; -+ config.enableRegionSimplification = true; -+ config.maxIterations = 2; -+ config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; -+ config.strictMode = GreedyRewriteStrictness::AnyOp; -+ -+ RewritePatternSet patterns(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context, state); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ patterns.add(context); -+ if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { -+ func.emitOpError() << "applyPatternsAndFoldGreedily failed"; -+ return failure(); -+ } -+ return success(); -+} -+ -+LogicalResult refineFunction(func::FuncOp func, MLIRContext* context, -+ RefineShapeState* state, -+ size_t nrPrefixTokenArguments, -+ SmallVector dimensionArguments, -+ SmallVector nonDimensionArgumentTypes) { -+ // The nonDimensionArgumentTypes include the prefix token arguments. -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() << ": initial type " -+ << debugString(func.getFunctionType()) << "\n"; -+ llvm::dbgs() << " has " << nrPrefixTokenArguments << " prefix tokens\n"; -+ for (size_t i = 0; i < dimensionArguments.size(); ++i) { -+ llvm::dbgs() << " with dimension arg[" << i -+ << "] = " << dimensionArguments[i] << "\n"; -+ } -+ }); -+ // Check that the argument types have static shapes. -+ for (size_t i = 0; i < nonDimensionArgumentTypes.size(); ++i) { -+ if (i < nrPrefixTokenArguments) continue; -+ auto argType = nonDimensionArgumentTypes[i]; -+ if (isa(argType)) continue; -+ auto argRankedTensorType = dyn_cast(argType); -+ if (!argRankedTensorType || !argRankedTensorType.hasStaticShape()) { -+ func.emitOpError() << func.getName() -+ << " must be refined with static shape arguments. " -+ << "Found argument of type " << debugString(argType); -+ return failure(); -+ } -+ } -+ auto alreadyRefined = state->validateFunctionRefinement( -+ func, dimensionArguments, nonDimensionArgumentTypes); -+ if (failed(alreadyRefined)) { -+ return failure(); -+ } -+ if (*alreadyRefined) { -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() -+ << ": skipping, already refined\n"; -+ }); -+ return success(); -+ } -+ state->startFunctionRefinement(func, dimensionArguments, -+ nonDimensionArgumentTypes); -+ // Only one block per function is supported at the moment. -+ // At the StableHLO level, functions are expected to only have one block, -+ // so supporting more is out of scope for this pass. -+ if (!func.getRegion().hasOneBlock()) { -+ func.emitOpError() << "must have exactly one block"; -+ return failure(); -+ } -+ -+ // Replace all dimension arguments with constants and remove those arguments. -+ // Wrap non-dimension arguments with bitcast_convert. -+ OpBuilder op_builder(func.getRegion()); -+ op_builder.setInsertionPointToStart(&func.getRegion().front()); -+ size_t firstNonDimensionArg = -+ nrPrefixTokenArguments + dimensionArguments.size(); -+ for (size_t i = 0; i < func.getNumArguments(); ++i) { -+ BlockArgument arg = func.getArgument(i); -+ Type argType = arg.getType(); -+ if (i < nrPrefixTokenArguments) { -+ continue; -+ } -+ if (i < firstNonDimensionArg) { -+ ShapedType argShapedType = dyn_cast(argType); -+ if (!argShapedType) { -+ func.emitOpError() << "dimension arguments must have shaped types"; -+ return failure(); -+ } -+ // We will drop the dimension arguments, replace them with constants. -+ auto replacement_op = op_builder.create( -+ arg.getLoc(), argType, -+ getTensorAttr(argShapedType, -+ dimensionArguments[i - nrPrefixTokenArguments])); -+ arg.replaceAllUsesWith(replacement_op); -+ } else { -+ int nonDimensionArgumentIndex = -+ nrPrefixTokenArguments + i - firstNonDimensionArg; -+ Type refinedType = nonDimensionArgumentTypes[nonDimensionArgumentIndex]; -+ if (refinedType != argType) { -+ // We add BitcastConvertOp as the only uses of the non-dimension -+ // arguments to ensure the module stays valid after we set the argument -+ // type. -+ auto replacement_op = op_builder.create( -+ arg.getLoc(), argType, arg); -+ arg.replaceAllUsesExcept(replacement_op->getResult(0), replacement_op); -+ arg.setType(refinedType); -+ } -+ } -+ } -+ BitVector argIndices(func.getNumArguments()); -+ argIndices.set(nrPrefixTokenArguments, firstNonDimensionArg); -+ func.eraseArguments(argIndices); -+ func.setType(op_builder.getFunctionType(nonDimensionArgumentTypes, -+ func.getResultTypes())); -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() << ": set type to " -+ << func.getFunctionType() << "\n"; -+ }); -+ if (failed(applyRewritePatterns(func, context, state))) return failure(); -+ LLVM_DEBUG({ -+ llvm::dbgs() << "refineFunction " << func.getName() << ": end with type " -+ << debugString(func.getFunctionType()) << "\n"; -+ }); -+ if (failed(state->finishFunctionRefinement(func))) return failure(); -+ return success(); -+} -+ - struct StablehloRefineShapesPass - : public impl::StablehloRefineShapesPassBase { - using StablehloRefineShapesPassBase::StablehloRefineShapesPassBase; - - void runOnOperation() override { -- // Only one function per module is supported at the moment to avoid the need -- // to think about iterative type inference algorithms. -- // Current use cases are served well by inlining multiple functions into -- // a single function, so we leave native support for multiple functions to -- // future work. - // To enable modules that contain CustomCallOp::called_computations, - // we allow multiple functions, in which case we only refine the main - // function called "main", assuming that the called computations will have - // static shapes. Lifting this assumption and expanding refinement to - // multiple functions is left for future work. - ModuleOp module = getOperation(); -+ RefineShapeState state; - auto funcs = llvm::to_vector(module.getOps()); - if (funcs.empty()) return; - func::FuncOp func; -@@ -1130,70 +1671,14 @@ - << " function to clearly identify which function will be refined"; - return signalPassFailure(); - } -- -- // Similarly, only one block per function is supported at the moment. -- // At the StableHLO level, functions are expected to only have one block, -- // so supporting more is out of scope for this pass. -- if (!func.getRegion().hasOneBlock()) { -- func.emitOpError() << "must have exactly one block"; -+ SmallVector emptyDimensionArguments; -+ SmallVector nonDimensionArgumentTypes; -+ for (auto arg : func.getArguments()) -+ nonDimensionArgumentTypes.push_back(arg.getType()); -+ if (failed(refineFunction(func, &getContext(), &state, 0, -+ emptyDimensionArguments, -+ nonDimensionArgumentTypes))) - return signalPassFailure(); -- } -- -- // The algorithm behind this pass consists of a single traversal of the -- // function. This is sufficient because we only support one function per -- // program at the moment. -- // TODO(#1048): Find out why .maxIterations = 1 no longer works. -- // There have been recent refactors to applyPatternsAndFoldGreedily -- // upstream, and that might be the reason. -- GreedyRewriteConfig config; -- config.useTopDownTraversal = true; -- config.enableRegionSimplification = true; -- config.maxIterations = 2; -- config.maxNumRewrites = GreedyRewriteConfig::kNoLimit; -- config.strictMode = GreedyRewriteStrictness::AnyOp; -- -- RewritePatternSet patterns(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- patterns.add(&getContext()); -- if (failed( -- applyPatternsAndFoldGreedily(func, std::move(patterns), config))) { -- return signalPassFailure(); -- } - } - }; - +@@ -1181,7 +1267,10 @@ + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); ++ patterns.add(&getContext()); + patterns.add(&getContext()); ++ patterns.add(&getContext()); ++ patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); diff --git a/third_party/xla/xla/python/refine_polymorphic_shapes.cc b/third_party/xla/xla/python/refine_polymorphic_shapes.cc index 44ce5b20e39d9e..063b15aba58062 100644 --- a/third_party/xla/xla/python/refine_polymorphic_shapes.cc +++ b/third_party/xla/xla/python/refine_polymorphic_shapes.cc @@ -251,6 +251,10 @@ absl::Status RefinePolymorphicShapes(mlir::ModuleOp module, pm.enableIRPrinting(print_before, print_after, /*printModuleScope=*/true, /*printAfterOnlyOnChange=*/true); } + + // TODO(necula): we should not need the inliner. + pm.addPass(mlir::createInlinerPass()); + pm.addPass(mlir::createCSEPass()); pm.addPass(mlir::stablehlo::createStablehloRefineShapesPass()); pm.addNestedPass( mlir::stablehlo::createStablehloCanonicalizeDynamismPass()); From 9e03bbf6d0b9073b0d4ee66eba95bf73c3c36d73 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Thu, 21 Sep 2023 09:23:34 -0700 Subject: [PATCH 087/567] [XLA:GPU] Trigger Triton GEMM fusions also on kCopy input operations. PiperOrigin-RevId: 567329486 --- .../xla/service/gpu/gemm_rewriter_triton.cc | 30 ++++++++++++------- .../xla/service/gpu/ir_emitter_triton_test.cc | 22 ++++++++++++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc index 8f1e0399ddb13a..8f237801bfa4cf 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc @@ -73,6 +73,16 @@ limitations under the License. namespace xla { namespace gpu { +int GetFusionLevel(const HloInstruction& hlo, const GpuVersion gpu_version) { + int level = + hlo.GetModule()->config().debug_options().xla_gpu_triton_fusion_level(); + if (!std::get(gpu_version) + .IsAtLeast(se::CudaComputeCapability::AMPERE)) { + level = std::min(level, 1); + } + return level; +} + bool HasDivisibleSuffixAllowingSplit(const absl::Span span, const int64_t divisor) { CHECK_GE(divisor, 1); @@ -1082,12 +1092,6 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( absl::flat_hash_map& old_to_new_mapping, const GpuVersion gpu_version) const { - int fusion_level = - hlo.GetModule()->config().debug_options().xla_gpu_triton_fusion_level(); - if (!std::get(gpu_version) - .IsAtLeast(se::CudaComputeCapability::AMPERE)) { - fusion_level = std::min(fusion_level, 1); - } if (hlo.opcode() == HloOpcode::kTuple || hlo.opcode() == HloOpcode::kGetTupleElement) { return "Unsupported instruction."; @@ -1108,7 +1112,7 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( return "Unsupported output data type."; } if (as_input) { - if (fusion_level < 2) { + if (GetFusionLevel(hlo, gpu_version) < 2) { if (hlo.opcode() == HloOpcode::kConvert) { if (FusionDecision decision = RequireTritonFusibleConvert(&hlo, gpu_version); @@ -1124,7 +1128,7 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( } } } else { - if (fusion_level < 2) { + if (GetFusionLevel(hlo, gpu_version) < 2) { return "Skipping fusing outputs at low fusion levels."; } for (const HloInstruction* operand : hlo.operands()) { @@ -1374,10 +1378,14 @@ StatusOr FuseDot(HloInstruction& dot, if (dot.GetModule()->config().debug_options().xla_gpu_triton_gemm_any()) { return FusionDecision{}; } + + absl::flat_hash_set triggers{ + HloOpcode::kConvert, HloOpcode::kSlice, HloOpcode::kTranspose}; + if (GetFusionLevel(dot, gpu_version) >= 2) { + triggers.insert(HloOpcode::kCopy); + } for (const auto& iter : old_to_new_mapping) { - if (iter.second->opcode() == HloOpcode::kConvert || - iter.second->opcode() == HloOpcode::kSlice || - iter.second->opcode() == HloOpcode::kTranspose) { + if (triggers.contains(iter.second->opcode())) { return FusionDecision{}; } } diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 65f6906475d8e2..af6776ed1c0231 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -1086,6 +1086,28 @@ ENTRY e { EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{/*aabs=*/2e-3, /*arel=*/2e-3})); } +TEST_F(TritonGemmLevel2Test, FuseTransposeWithoutMixedTypes) { + const std::string kHloText = R"( +ENTRY e { + p1 = f16[150,32,60]{2,1,0} parameter(1) + p0 = f16[75,2,26,60]{3,2,1,0} parameter(0) + t = f16[75,2,60,26]{3,2,1,0} transpose(p0), dimensions={0,1,3,2} + r = f16[150,60,26]{2,1,0} reshape(t) + ROOT tmp_4 = f16[150,32,26]{2,1,0} dot(p1, r), + lhs_batch_dims={0}, lhs_contracting_dims={2}, + rhs_batch_dims={0}, rhs_contracting_dims={1} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + GetOptimizedModule(kHloText)); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter(), m::Parameter()) + .WithFusionKind(HloInstruction::FusionKind::kCustom))); + + EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{/*aabs=*/1e-3, /*arel=*/1e-3})); +} + TEST_F(TritonGemmTest, SineOutputIsNotFused) { const std::string kHloText = R"( HloModule m From 17d45cfb8980fc6798926b2968f57d4213c6d5c8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 09:26:22 -0700 Subject: [PATCH 088/567] Disable two compiler warnings in the bazelrc This is adding `-Wno-error=unused-command-line-argument` and `-Wno-gnu-offsetof-extensions` to the `rbe_linux_cpu` config. We have these flags already in the `release_cpu_linux` config, but some RBE builds don't use that config. So I'm adding them to `rbe_linux_cpu` as well. Ideally there would be a config that applies to all clang builds but not to the old GCC builds, but that doesn't exist. So for now I don't see a good option other than having the flag in two places (without major restructurings of the bazelrc file). PiperOrigin-RevId: 567330212 --- .bazelrc | 7 +++++++ third_party/xla/.bazelrc | 7 +++++++ third_party/xla/third_party/tsl/.bazelrc | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/.bazelrc b/.bazelrc index 8fb09a849a8b57..475a1c5378cdfe 100644 --- a/.bazelrc +++ b/.bazelrc @@ -463,6 +463,13 @@ build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//cro build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +# This is needed for all Clang17 builds but must not be present in GCC builds. +build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument +# This was added in clang-16 by https://reviews.llvm.org/D133574. +# Can be removed once upb is updated, since a type definition is used within +# offset of in the current version of ubp. +# See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183. +build:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions # Python config is the same across all containers because the binary is the same build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" build:rbe_linux_cpu --python_path="/usr/bin/python3" diff --git a/third_party/xla/.bazelrc b/third_party/xla/.bazelrc index 8fb09a849a8b57..475a1c5378cdfe 100644 --- a/third_party/xla/.bazelrc +++ b/third_party/xla/.bazelrc @@ -463,6 +463,13 @@ build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//cro build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +# This is needed for all Clang17 builds but must not be present in GCC builds. +build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument +# This was added in clang-16 by https://reviews.llvm.org/D133574. +# Can be removed once upb is updated, since a type definition is used within +# offset of in the current version of ubp. +# See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183. +build:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions # Python config is the same across all containers because the binary is the same build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" build:rbe_linux_cpu --python_path="/usr/bin/python3" diff --git a/third_party/xla/third_party/tsl/.bazelrc b/third_party/xla/third_party/tsl/.bazelrc index 8fb09a849a8b57..475a1c5378cdfe 100644 --- a/third_party/xla/third_party/tsl/.bazelrc +++ b/third_party/xla/third_party/tsl/.bazelrc @@ -463,6 +463,13 @@ build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//cro build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +# This is needed for all Clang17 builds but must not be present in GCC builds. +build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument +# This was added in clang-16 by https://reviews.llvm.org/D133574. +# Can be removed once upb is updated, since a type definition is used within +# offset of in the current version of ubp. +# See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183. +build:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions # Python config is the same across all containers because the binary is the same build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" build:rbe_linux_cpu --python_path="/usr/bin/python3" From 91f89fd2abb71f41522e47a2910b63da50651973 Mon Sep 17 00:00:00 2001 From: Wilsin Gosti Date: Thu, 21 Sep 2023 09:33:37 -0700 Subject: [PATCH 089/567] #tf.data The change of default memory cap for tf.data should have been rolled out with an experiment. Revert it back and will re-roll it out with an experiment. PiperOrigin-RevId: 567332040 --- tensorflow/core/framework/model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index 5dfa6f579abfdc..727af779489b1b 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -65,7 +65,7 @@ constexpr char kMaxBufferedElements[] = "max_buffered_elements"; constexpr char kModelInputTimeKey[] = "model_input_time"; // Default share of available RAM that can be used by model's internal buffers. -constexpr double kRamBudgetShare = 0.9; +constexpr double kRamBudgetShare = 0.5; // Weight of the latest processing time used in computing the exponential moving // average of processing time per element. From 6c34dcf9aa7970fcfbba4508e9437a6f17517e24 Mon Sep 17 00:00:00 2001 From: Quentin Khan Date: Thu, 21 Sep 2023 09:51:03 -0700 Subject: [PATCH 090/567] Legalize MHLO Pad operation to TFLite PiperOrigin-RevId: 567337044 --- tensorflow/compiler/mlir/lite/stablehlo/BUILD | 1 + .../stablehlo/tests/tfl_legalize_hlo_pad.mlir | 175 ++++++++++++++ .../lite/stablehlo/transforms/legalize_hlo.cc | 3 + .../transforms/legalize_hlo_conversions/BUILD | 21 ++ .../legalize_hlo_conversions/pad.cc | 82 +++++++ .../transforms/legalize_hlo_conversions/pad.h | 36 +++ .../legalize_hlo_conversions/util.cc | 220 +++++++++++++++++- .../legalize_hlo_conversions/util.h | 139 ++++++++++- .../transforms/legalize_hlo_patterns.td | 1 + .../transforms/tflite_legalize_hlo.cc | 5 +- 10 files changed, 675 insertions(+), 8 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/stablehlo/tests/tfl_legalize_hlo_pad.mlir create mode 100644 tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.cc create mode 100644 tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.h diff --git a/tensorflow/compiler/mlir/lite/stablehlo/BUILD b/tensorflow/compiler/mlir/lite/stablehlo/BUILD index 4907ef34d37cd8..b65d7480a6ece3 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/BUILD +++ b/tensorflow/compiler/mlir/lite/stablehlo/BUILD @@ -500,6 +500,7 @@ cc_library( deps = [ ":passes_inc_gen", "//tensorflow/compiler/mlir/lite:tensorflow_lite", + "//tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions:pad", "//tensorflow/compiler/mlir/tensorflow", "@llvm-project//mlir:ArithDialect", "@llvm-project//mlir:FuncDialect", diff --git a/tensorflow/compiler/mlir/lite/stablehlo/tests/tfl_legalize_hlo_pad.mlir b/tensorflow/compiler/mlir/lite/stablehlo/tests/tfl_legalize_hlo_pad.mlir new file mode 100644 index 00000000000000..b72b4296c000ff --- /dev/null +++ b/tensorflow/compiler/mlir/lite/stablehlo/tests/tfl_legalize_hlo_pad.mlir @@ -0,0 +1,175 @@ +// RUN: odml-to-stablehlo-opt %s -tfl-legalize-hlo -split-input-file | FileCheck %s --dump-input=fail + +func.func @mhlo_pad_test__noop(%input: tensor<5x7xf32>, %padding_value: tensor) -> tensor<5x7xf32> { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[0, 0]> : tensor<2xi64>, + edge_padding_high = dense<[0, 0]> : tensor<2xi64>, + interior_padding = dense<[0, 0]> : tensor<2xi64> + } : (tensor<5x7xf32>, tensor) -> tensor<5x7xf32> + func.return %0: tensor<5x7xf32> + +// CHECK-LABEL: mhlo_pad_test__noop +// CHECK: return %arg0 : tensor<5x7xf32> +} + +func.func @mhlo_pad_test__pad_all(%input: tensor<5x7xf32>, %padding_value: tensor) -> tensor<9x10xf32> { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[3, 2]> : tensor<2xi64>, + edge_padding_high = dense<[1, 1]> : tensor<2xi64>, + interior_padding = dense<[0, 0]> : tensor<2xi64> + } : (tensor<5x7xf32>, tensor) -> tensor<9x10xf32> + func.return %0: tensor<9x10xf32> + +// CHECK-LABEL: mhlo_pad_test__pad_all +// CHECK: %cst = arith.constant dense<{{\[}}[3, 1], [2, 1]]> : tensor<2x2xi64> +// CHECK: %0 = "tfl.padv2"(%arg0, %cst, %arg1) : (tensor<5x7xf32>, tensor<2x2xi64>, tensor) -> tensor<9x10xf32> +// CHECK: return %0 : tensor<9x10xf32> +} + +func.func @mhlo_pad_test__crop_all(%input: tensor<5x7xf32>, %padding_value: tensor) -> tensor<3x5xf32> { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[-1, -1]> : tensor<2xi64>, + edge_padding_high = dense<[-1, -1]> : tensor<2xi64>, + interior_padding = dense<[0, 0]> : tensor<2xi64> + } : (tensor<5x7xf32>, tensor) -> tensor<3x5xf32> + func.return %0: tensor<3x5xf32> + +// CHECK-LABEL: mhlo_pad_test__crop_all +// CHECK: %cst = arith.constant dense<1> : tensor<2xi64> +// CHECK: %cst_0 = arith.constant dense<-1> : tensor<2xi64> +// CHECK: %cst_1 = arith.constant dense<1> : tensor<2xi64> +// CHECK: %0 = "tfl.strided_slice"(%arg0, %cst, %cst_0, %cst_1) {begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 0 : i32} : (tensor<5x7xf32>, tensor<2xi64>, tensor<2xi64>, tensor<2xi64>) -> tensor<3x5xf32> +// CHECK: return %0 : tensor<3x5xf32> +} + +func.func @mhlo_pad_test__interior_pad_all(%input: tensor<5x7xf32>, %padding_value: tensor) -> tensor<9x13xf32> { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[0, 0]> : tensor<2xi64>, + edge_padding_high = dense<[0, 0]> : tensor<2xi64>, + interior_padding = dense<[1, 1]> : tensor<2xi64> + } : (tensor<5x7xf32>, tensor) -> tensor<9x13xf32> + func.return %0: tensor<9x13xf32> + +// CHECK-LABEL: mhlo_pad_test__interior_pad_all +// CHECK: %cst = arith.constant dense<2> : tensor<2xi32> +// CHECK: %0 = "tfl.dilate"(%arg0, %cst, %arg1) : (tensor<5x7xf32>, tensor<2xi32>, tensor) -> tensor<9x13xf32> +// CHECK: return %0 : tensor<9x13xf32> +} + +func.func @mhlo_pad_test__pad_and_crop(%input: tensor<5x7xf32>, %padding_value: tensor) -> tensor<5x7xf32> { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[-1, 1]> : tensor<2xi64>, + edge_padding_high = dense<[1, -1]> : tensor<2xi64>, + interior_padding = dense<[0, 0]> : tensor<2xi64> + } : (tensor<5x7xf32>, tensor) -> tensor<5x7xf32> + func.return %0: tensor<5x7xf32> + +// CHECK-LABEL: mhlo_pad_test__pad_and_crop +// CHECK: %cst = arith.constant dense<{{\[}}[0, 1], [1, 0]]> : tensor<2x2xi64> +// CHECK: %0 = "tfl.padv2"(%arg0, %cst, %arg1) : (tensor<5x7xf32>, tensor<2x2xi64>, tensor) -> tensor<6x8xf32> +// CHECK: %cst_0 = arith.constant dense<[1, 0]> : tensor<2xi64> +// CHECK: %cst_1 = arith.constant dense<[0, -1]> : tensor<2xi64> +// CHECK: %cst_2 = arith.constant dense<1> : tensor<2xi64> +// CHECK: %1 = "tfl.strided_slice"(%0, %cst_0, %cst_1, %cst_2) {begin_mask = 2 : i32, ellipsis_mask = 0 : i32, end_mask = 1 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 0 : i32} : (tensor<6x8xf32>, tensor<2xi64>, tensor<2xi64>, tensor<2xi64>) -> tensor<5x7xf32> +// CHECK: return %1 : tensor<5x7xf32> +} + +func.func @mhlo_pad_test__pad_and_crop_and_interior_pad(%input: tensor<5x7xf32>, %padding_value: tensor) -> tensor<13x25xf32> { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[-1, 1]> : tensor<2xi64>, + edge_padding_high = dense<[1, -1]> : tensor<2xi64>, + interior_padding = dense<[2, 3]> : tensor<2xi64> + } : (tensor<5x7xf32>, tensor) -> tensor<13x25xf32> + func.return %0: tensor<13x25xf32> + +// CHECK-LABEL: mhlo_pad_test__pad_and_crop_and_interior_pad +// CHECK: %cst = arith.constant dense<[3, 4]> : tensor<2xi32> +// CHECK: %0 = "tfl.dilate"(%arg0, %cst, %arg1) : (tensor<5x7xf32>, tensor<2xi32>, tensor) -> tensor<13x25xf32> +// CHECK: %cst_0 = arith.constant dense<{{\[}}[0, 1], [1, 0]]> : tensor<2x2xi64> +// CHECK: %1 = "tfl.padv2"(%0, %cst_0, %arg1) : (tensor<13x25xf32>, tensor<2x2xi64>, tensor) -> tensor<14x26xf32> +// CHECK: %cst_1 = arith.constant dense<[1, 0]> : tensor<2xi64> +// CHECK: %cst_2 = arith.constant dense<[0, -1]> : tensor<2xi64> +// CHECK: %cst_3 = arith.constant dense<1> : tensor<2xi64> +// CHECK: %2 = "tfl.strided_slice"(%1, %cst_1, %cst_2, %cst_3) {begin_mask = 2 : i32, ellipsis_mask = 0 : i32, end_mask = 1 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 0 : i32} : (tensor<14x26xf32>, tensor<2xi64>, tensor<2xi64>, tensor<2xi64>) -> tensor<13x25xf32> +// CHECK: return %2 : tensor<13x25xf32> +} + +func.func @mhlo_pad_test__pad_all_unknown_shape(%input: tensor, %padding_value: tensor) -> tensor { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[1, 1, 1, 1]> : tensor<4xi64>, + edge_padding_high = dense<[1, 1, 1, 1]> : tensor<4xi64>, + interior_padding = dense<[0, 0, 0, 0]> : tensor<4xi64> + } : (tensor, tensor) -> tensor + func.return %0: tensor + +// CHECK-LABEL: mhlo_pad_test__pad_all_unknown_shape +// CHECK: %cst = arith.constant dense<1> : tensor<4x2xi64> +// CHECK: %0 = "tfl.padv2"(%arg0, %cst, %arg1) : (tensor, tensor<4x2xi64>, tensor) -> tensor +// CHECK: return %0 : tensor +} + +func.func @mhlo_pad_test__crop_all_unknown_shape(%input: tensor, %padding_value: tensor) -> tensor { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[-1, -1, -1, -1]> : tensor<4xi64>, + edge_padding_high = dense<[-1, -1, -1, -1]> : tensor<4xi64>, + interior_padding = dense<[0, 0, 0, 0]> : tensor<4xi64> + } : (tensor, tensor) -> tensor + func.return %0: tensor + +// CHECK-LABEL: mhlo_pad_test__crop_all_unknown_shape +// CHECK: %cst = arith.constant dense<1> : tensor<4xi64> +// CHECK: %cst_0 = arith.constant dense<-1> : tensor<4xi64> +// CHECK: %cst_1 = arith.constant dense<1> : tensor<4xi64> +// CHECK: %0 = "tfl.strided_slice"(%arg0, %cst, %cst_0, %cst_1) {begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 0 : i32} : (tensor, tensor<4xi64>, tensor<4xi64>, tensor<4xi64>) -> tensor +// CHECK: return %0 : tensor +} + +func.func @mhlo_pad_test__pad_all_unknown_dim0(%input: tensor, %padding_value: tensor) -> tensor { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[1, 1, 1, 1]> : tensor<4xi64>, + edge_padding_high = dense<[1, 1, 1, 1]> : tensor<4xi64>, + interior_padding = dense<[0, 0, 0, 0]> : tensor<4xi64> + } : (tensor, tensor) -> tensor + func.return %0: tensor + +// CHECK-LABEL: mhlo_pad_test__pad_all_unknown_dim0 +// CHECK: %cst = arith.constant dense<1> : tensor<4x2xi64> +// CHECK: %0 = "tfl.padv2"(%arg0, %cst, %arg1) : (tensor, tensor<4x2xi64>, tensor) -> tensor +// CHECK: return %0 : tensor +} + +func.func @mhlo_pad_test__crop_all_unknown_dim0(%input: tensor, %padding_value: tensor) -> tensor { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[-1, -1, -1, -1]> : tensor<4xi64>, + edge_padding_high = dense<[-1, -1, -1, -1]> : tensor<4xi64>, + interior_padding = dense<[0, 0, 0, 0]> : tensor<4xi64> + } : (tensor, tensor) -> tensor + func.return %0: tensor + +// CHECK-LABEL: mhlo_pad_test__crop_all_unknown_dim0 +// CHECK: %cst = arith.constant dense<1> : tensor<4xi64> +// CHECK: %cst_0 = arith.constant dense<-1> : tensor<4xi64> +// CHECK: %cst_1 = arith.constant dense<1> : tensor<4xi64> +// CHECK: %0 = "tfl.strided_slice"(%arg0, %cst, %cst_0, %cst_1) {begin_mask = 0 : i32, ellipsis_mask = 0 : i32, end_mask = 0 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 0 : i32} : (tensor, tensor<4xi64>, tensor<4xi64>, tensor<4xi64>) -> tensor +// CHECK: return %0 : tensor +} + +func.func @mhlo_pad_test__pad_and_crop_and_interior_pad_unknown_dim0(%input: tensor, %padding_value: tensor) -> tensor { + %0 = "mhlo.pad"(%input, %padding_value) { + edge_padding_low = dense<[-2, -1, 0, 1]> : tensor<4xi64>, + edge_padding_high = dense<[1, 0, -1, -2]> : tensor<4xi64>, + interior_padding = dense<[1, 2, 3, 4]> : tensor<4xi64> + } : (tensor, tensor) -> tensor + func.return %0: tensor + +// CHECK-LABEL: mhlo_pad_test__pad_and_crop_and_interior_pad_unknown_dim0 +// CHECK: %cst = arith.constant dense<[2, 3, 4, 5]> : tensor<4xi32> +// CHECK: %0 = "tfl.dilate"(%arg0, %cst, %arg1) : (tensor, tensor<4xi32>, tensor) -> tensor +// CHECK: %cst_0 = arith.constant dense<{{\[}}[0, 1], [0, 0], [0, 0], [1, 0]]> : tensor<4x2xi64> +// CHECK: %1 = "tfl.padv2"(%0, %cst_0, %arg1) : (tensor, tensor<4x2xi64>, tensor) -> tensor +// CHECK: %cst_1 = arith.constant dense<[2, 1, 0, 0]> : tensor<4xi64> +// CHECK: %cst_2 = arith.constant dense<[0, 0, -1, -2]> : tensor<4xi64> +// CHECK: %cst_3 = arith.constant dense<1> : tensor<4xi64> +// CHECK: %2 = "tfl.strided_slice"(%1, %cst_1, %cst_2, %cst_3) {begin_mask = 12 : i32, ellipsis_mask = 0 : i32, end_mask = 3 : i32, new_axis_mask = 0 : i32, offset = false, shrink_axis_mask = 0 : i32} : (tensor, tensor<4xi64>, tensor<4xi64>, tensor<4xi64>) -> tensor +// CHECK: return %2 : tensor +} diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc index ca6db32e17ac42..d424a1e9c3137b 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc @@ -3337,6 +3337,9 @@ class ConvertIfOp : public OpConversionPattern { }; // Converts mhlo.pad to tf.PadV2 +// TODO: b/301438955 - This is redundant with the MHLO -> TFLite +// legalization and covers less usecases. We need to check with DarwiNN that +// this can be removed without breaking their workflow. Value ConvertPadOp(PatternRewriter& rewriter, Operation* old_op) { auto pad_op = cast(old_op); mlir::Location loc = pad_op.getLoc(); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/BUILD b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/BUILD index c5c1e7cade6a8f..c397605fe682ec 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/BUILD +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/BUILD @@ -18,8 +18,11 @@ cc_library( "util.h", ], deps = [ + "//tensorflow/compiler/mlir/lite:tensorflow_lite", "//tensorflow/compiler/mlir/tensorflow", + "@com_google_absl//absl/algorithm:container", "@llvm-project//llvm:Support", + "@llvm-project//mlir:ArithDialect", "@llvm-project//mlir:IR", "@llvm-project//mlir:Support", "@llvm-project//mlir:TransformUtils", @@ -45,3 +48,21 @@ cc_library( "@local_xla//xla/mlir_hlo", ], ) + +cc_library( + name = "pad", + srcs = [ + "pad.cc", + ], + hdrs = [ + "pad.h", + ], + deps = [ + ":util", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:TransformUtils", + "@local_xla//xla/mlir_hlo", + ], +) diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.cc new file mode 100644 index 00000000000000..9fd1fcb8402c51 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.cc @@ -0,0 +1,82 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.h" + +#include + +#include "llvm/ADT/SmallVector.h" +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h" +#include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" + +namespace mlir { +namespace odml { + +ConversionState BuildConversionState(mhlo::PadOp mhlo_pad, + ConversionPatternRewriter& rewriter) { + ConversionState state{ + /*.shlo_op=*/mhlo_pad.getOperation(), + /*.rewriter=*/rewriter, + /*.last_tf_op=*/nullptr, + }; + return state; +} + +// Converts the given StableHLO Pad operation to a chain of TFLite operations. +// +// StableHLO Pad allows dilating, padding and cropping its input, in that order. +// This can be implemented in TFLite as a sequence of these operations. Note +// that all operations do not always need to be called: if there is no dilation +// (resp. pad, crop) we do not need to add it to the chain. +// +// TFLite does not provide a crop operation, the StridedSlice one is used +// instead. +LogicalResult ConvertPadOp::matchAndRewrite( + mhlo::PadOp mhlo_pad, OpAdaptor adaptor, + ConversionPatternRewriter& rewriter) const { + // We don't need to match the pad op as we always know how to convert it. + ConversionState state = BuildConversionState(mhlo_pad, rewriter); + + // Dilate when interior padding is specified different from 0. + AddDilateOpIfRequired(state, mhlo_pad.getInteriorPadding(), + mhlo_pad.getPaddingValue(), + /*is_padding=*/true); + // Pad when padding has positive values. + AddPadOpIfRequired(state, mhlo_pad.getEdgePaddingLow(), + mhlo_pad.getEdgePaddingHigh(), mhlo_pad.getPaddingValue()); + // Crop when padding has negative values. + // + // Note that there is no crop operation in TFLite so we use the StridedSlice + // operation instead. + const DenseElementsAttr strides_data = CreateDenseElementsAttr( + state.rewriter, + llvm::SmallVector(state.GetOperandShape().size(), 1)); + AddStridedSliceOpIfRequired(state, mhlo_pad.getEdgePaddingLow(), + mhlo_pad.getEdgePaddingHigh(), strides_data); + + if (state.last_tf_op) { + rewriter.replaceOp(mhlo_pad, state.last_tf_op); + } else { + rewriter.replaceOp(mhlo_pad, mhlo_pad.getOperand()); + } + return success(); +} + +} // namespace odml +} // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.h b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.h new file mode 100644 index 00000000000000..c0fa5017b69236 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.h @@ -0,0 +1,36 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_LITE_STABLEHLO_TRANSFORMS_LEGALIZE_HLO_CONVERSIONS_PAD_H_ +#define TENSORFLOW_COMPILER_MLIR_LITE_STABLEHLO_TRANSFORMS_LEGALIZE_HLO_CONVERSIONS_PAD_H_ + +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" + +namespace mlir { +namespace odml { + +class ConvertPadOp : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + mhlo::PadOp mhlo_pad, OpAdaptor adaptor, + ConversionPatternRewriter& rewriter) const final; +}; + +} // namespace odml +} // namespace mlir +#endif // TENSORFLOW_COMPILER_MLIR_LITE_STABLEHLO_TRANSFORMS_LEGALIZE_HLO_CONVERSIONS_PAD_H_ diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc index 2cd9a689f8e780..3588b68bc1b182 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc @@ -15,10 +15,11 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h" -#include - #include +#include +#include +#include "absl/algorithm/container.h" #include "llvm/ADT/SmallVector.h" #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project @@ -28,9 +29,11 @@ limitations under the License. #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/Region.h" // from @llvm-project #include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" // IWYU pragma: keep #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" @@ -158,5 +161,218 @@ LogicalResult MatchBinaryReduceFunction(mlir::Region& function) { return success(); } +Value ConversionState::GetOperand() const { + if (last_tf_op) { + return last_tf_op->getResult(0); + } + return hlo_op->getOperand(0); +} + +TensorType ConversionState::GetOperandTensorType() const { + if (last_tf_op) { + return last_tf_op->getResult(0).getType().cast(); + } + return hlo_op->getOperand(0).getType().cast(); +} + +llvm::ArrayRef ConversionState::GetOperandShape() const { + return GetOperandTensorType().getShape(); +} + +namespace { + +// Gets the dilation data for TFLite Dilate. +// +// Depending on the definition of the op we are trying to legalize, a dilation +// can be either seen as interior padding or as a scaling factor where: +// +// scaling_factor = interior_padding + 1 +// +// The is_padding parameter is used to take this difference into account. +llvm::SmallVector GetDilateData(const DenseElementsAttr& dilation, + const bool is_padding) { + llvm::SmallVector data; + for (const auto& v : dilation.getValues()) { + data.push_back(v.getSExtValue() + static_cast(is_padding)); + } + return data; +} + +} // namespace + +void AddDilateOpIfRequired(ConversionState& state, + const DenseElementsAttr& dilation, + const Value padding_value, const bool is_padding) { + const auto dilate_data = GetDilateData(dilation, is_padding); + if (absl::c_any_of(dilate_data, IsNot(1))) { + const TensorType output_type = state.ComputeResultTensorType( + [](int i, const auto& shape, const auto& dilate_data) { + if (shape[i] < 0) { + return shape[i]; + } + return shape[i] + (shape[i] - 1) * (dilate_data[i] - 1); + }, + dilate_data); + + auto dilate_tensor = AddConstantTensor(state, dilate_data); + auto tfl_dilate = state.rewriter.create( + state.hlo_op->getLoc(), output_type, state.GetOperand(), dilate_tensor, + padding_value); + + state.last_tf_op = tfl_dilate; + } +} + +namespace { + +// Gets the pad data for TFLite PadV2. +// +// StableHLO Pad allows negative values for cropping. This functions replaces +// negative values with 0. +llvm::SmallVector GetPadData( + const DenseElementsAttr& edge_padding_low, + const DenseElementsAttr& edge_padding_high) { + llvm::SmallVector data; + auto low_values = edge_padding_low.getValues(); + auto high_values = edge_padding_high.getValues(); + for (int i = 0; i < edge_padding_low.getNumElements(); ++i) { + const int64_t pad_low = low_values[i].getSExtValue(); + const int64_t pad_high = high_values[i].getSExtValue(); + data.push_back(pad_low < 0 ? 0 : pad_low); + data.push_back(pad_high < 0 ? 0 : pad_high); + } + return data; +} + +template +void AddPadOpIfRequiredImpl(ConversionState& state, const Container& pad_data, + const Value padding_value) { + if (absl::c_any_of(pad_data, IsNot(0))) { + const TensorType output_type = state.ComputeResultTensorType( + [](int i, const auto& shape, const auto& pad) { + if (shape[i] < 0) { + return shape[i]; + } + return shape[i] + pad[2 * i] + pad[2 * i + 1]; + }, + pad_data); + + auto pad_tensor = AddConstantTensor( + state, pad_data, + {static_cast(state.GetOperandShape().size()), 2}); + auto tfl_pad = state.rewriter.create( + state.hlo_op->getLoc(), output_type, state.GetOperand(), pad_tensor, + padding_value); + + state.last_tf_op = tfl_pad; + } +} + +} // namespace + +void AddPadOpIfRequired(ConversionState& state, + const DenseElementsAttr& edge_padding_low, + const DenseElementsAttr& edge_padding_high, + const Value padding_value) { + AddPadOpIfRequiredImpl(state, GetPadData(edge_padding_low, edge_padding_high), + padding_value); +} + +namespace { + +// Holds the data needed to generate a TFLite StridedSlice operation. +struct StridedSliceData { + llvm::SmallVector low; + llvm::SmallVector high; + llvm::SmallVector strides; + int32_t begin_mask = 0; + int32_t end_mask = 0; + + void resize(const size_t size) { + low.resize(size); + high.resize(size); + strides.resize(size); + } +}; + +// Updates the strided slice data with the given values for the `i`th element. +// +// Warning: this expects the data internal buffers to have at least i+1 +// elements. +void AppendDataDim(StridedSliceData& data, const int i, const APInt& low, + const APInt& high, const APInt& stride) { + const int64_t pad_low = low.getSExtValue(); + const int64_t pad_high = high.getSExtValue(); + if (pad_low >= 0) { + data.begin_mask |= 1 << i; + data.low[i] = 0; + } else { + data.low[i] = -pad_low; + } + if (pad_high >= 0) { + data.end_mask |= 1 << i; + data.high[i] = 0; + } else { + data.high[i] = pad_high; + } + data.strides[i] = stride.getSExtValue(); +} + +// Gets the data needed to generate a TFLite StridedSlice operation. +StridedSliceData GetStridedSliceData(const DenseElementsAttr& edge_padding_low, + const DenseElementsAttr& edge_padding_high, + const DenseElementsAttr& strides) { + StridedSliceData data; + data.resize(edge_padding_low.getNumElements()); + const auto low_values = edge_padding_low.getValues(); + const auto high_values = edge_padding_high.getValues(); + const auto stride_values = strides.getValues(); + for (int i = 0; i < edge_padding_low.getNumElements(); ++i) { + AppendDataDim(data, i, low_values[i], high_values[i], stride_values[i]); + } + return data; +} + +void AddStridedSliceOpIfRequiredImpl( + ConversionState& state, const StridedSliceData& strided_slice_data) { + if (absl::c_any_of(strided_slice_data.low, IsNot(0)) || + absl::c_any_of(strided_slice_data.high, IsNot(0)) || + absl::c_any_of(strided_slice_data.strides, IsNot(1))) { + const TensorType output_type = state.ComputeResultTensorType( + [](int i, const auto& shape, const auto& high, const auto& low, + const auto& strides) { + if (shape[i] < 0) { + return shape[i]; + } + return (shape[i] + high[i] - low[i]) / strides[i]; + }, + strided_slice_data.high, strided_slice_data.low, + strided_slice_data.strides); + + auto crop_begin_tensor = AddConstantTensor(state, strided_slice_data.low); + auto crop_end_tensor = AddConstantTensor(state, strided_slice_data.high); + auto crop_strides_tensor = + AddConstantTensor(state, strided_slice_data.strides); + auto tfl_crop = state.rewriter.create( + state.hlo_op->getLoc(), output_type, state.GetOperand(), + crop_begin_tensor, crop_end_tensor, crop_strides_tensor, + strided_slice_data.begin_mask, strided_slice_data.end_mask, 0, 0, 0, + false); + + state.last_tf_op = tfl_crop; + } +} + +} // namespace + +void AddStridedSliceOpIfRequired(ConversionState& state, + const DenseElementsAttr& edge_padding_low, + const DenseElementsAttr& edge_padding_high, + const DenseElementsAttr& strides) { + StridedSliceData strided_slice_data = + GetStridedSliceData(edge_padding_low, edge_padding_high, strides); + AddStridedSliceOpIfRequiredImpl(state, strided_slice_data); +} + } // namespace odml } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h index 72485597d6c4ff..8d63bfa1435324 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h @@ -16,12 +16,12 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_LITE_STABLEHLO_TRANSFORMS_LEGALIZE_HLO_CONVERSIONS_UTIL_H_ #define TENSORFLOW_COMPILER_MLIR_LITE_STABLEHLO_TRANSFORMS_LEGALIZE_HLO_CONVERSIONS_UTIL_H_ -#include - -#include +#include #include "llvm/ADT/SmallVector.h" +#include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project #include "mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project #include "mlir/IR/BuiltinTypes.h" // from @llvm-project @@ -30,7 +30,6 @@ limitations under the License. #include "mlir/IR/Types.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" namespace mlir { @@ -112,6 +111,138 @@ LogicalResult MatchBinaryReduceFunction(mlir::Region& function) { // scatter like ops. template <> LogicalResult MatchBinaryReduceFunction(mlir::Region& function); + +// Concentrates the data needed to substitute StableHLO operations with TFLite +// ones. +struct ConversionState { + Operation* hlo_op; + ConversionPatternRewriter& rewriter; + Operation* last_tf_op; + + // Returns the main operand of a NEW op to add to the conversion chain. + // + // This is generally the result of the last op that was added to the chain. + Value GetOperand() const; + + // Returns the type of the operand of a NEW op to add to the conversion chain. + // + // This is generally the type of the result of the last op that was added to + // the chain. + TensorType GetOperandTensorType() const; + + llvm::ArrayRef GetOperandShape() const; + + // Computes a new shape from the current operand shape. + // + // - The args are containers that are indexable using operator[]. + // - The callback must be callable have a signature that is: + // `int64_t (int idx, shape, decltype(args)...)` + // + // The callback is called for each element of the operand shape with the + // index of the current loop iteration, the shape and args. + template + llvm::SmallVector ComputeResultShape(F&& callback, + Containers&&... args) const { + llvm::ArrayRef shape = GetOperandShape(); + llvm::SmallVector res; + for (int i = 0; i < shape.size(); ++i) { + if (shape[i] < 0) { + res.push_back(shape[i]); + } else { + res.push_back(callback(i, shape, args...)); + } + } + return res; + } + + template + TensorType ComputeResultTensorType(F&& callback, Containers&&... args) const { + const llvm::SmallVector shape = ComputeResultShape( + static_cast(callback), static_cast(args)...); + return GetOperandTensorType().cloneWith( + shape, GetOperandTensorType().getElementType()); + } +}; + +// Gets the Type associated to type T from the builder. +template +Type GetElementType(OpBuilder& builder); + +#define GET_ELEMENT_TYPE_SPECIALISATION(TYPE, NAME) \ + template <> \ + inline Type GetElementType(OpBuilder & builder) { \ + return builder.get##NAME##Type(); \ + } + +GET_ELEMENT_TYPE_SPECIALISATION(int32_t, I32); +GET_ELEMENT_TYPE_SPECIALISATION(int64_t, I64); + +// Create a DenseElementsAttr from given shape and data. +template > +DenseElementsAttr CreateDenseElementsAttr(OpBuilder& builder, const Data& data, + const Shape& shape = Shape()) { + llvm::SmallVector attr_shape(shape.begin(), shape.end()); + if (attr_shape.empty()) { + attr_shape.push_back(static_cast(data.size())); + } + const Type attr_type = GetElementType(builder); + return DenseElementsAttr::get(RankedTensorType::get(attr_shape, attr_type), + ArrayRef(data)); +} + +// Adds a constant tensor to the conversion chain. +template > +auto AddConstantTensor(ConversionState& state, const Data& data, + const Shape& shape = Shape()) { + const DenseElementsAttr attr = + CreateDenseElementsAttr(state.rewriter, data, shape); + return state.rewriter.create(state.hlo_op->getLoc(), attr); +} + +// Builds a callable object that checks that its argument is not the given +// `value`. +template +auto IsNot(T value) { + return [value](auto v) { return v != value; }; +} + +// Adds a TFLite Dilate operation to the conversion chain. +// +// If the given parameters would end with the identity operation, this does not +// add anything to the chain. +// +// Depending on the definition of the op we are trying to legalize, a dilation +// can be either seen as interior padding or as a scaling factor where: +// +// scaling_factor = interior_padding + 1 +// +// The is_padding parameter is used to take this difference into account. +void AddDilateOpIfRequired(ConversionState& state, + const DenseElementsAttr& dilation, + Value padding_value, bool is_padding); + +// Adds a TFLite PadV2 operation to the conversion chain. +// +// If the given parameters would end with the identity operation, this does not +// add anything to the chain. +void AddPadOpIfRequired(ConversionState& state, + const DenseElementsAttr& edge_padding_low, + const DenseElementsAttr& edge_padding_high, + Value padding_value); + +// Adds a TFLite StridedSlice operation to the conversion chain. +// +// This overload is used to legalize a crop operation in TFLite. As such, the +// begin and end specifications of the strided slice are computed from the +// negative values in the padding parameters. +// +// If the given parameters would end with the identity operation, this does not +// add anything to the chain. +void AddStridedSliceOpIfRequired(ConversionState& state, + const DenseElementsAttr& edge_padding_low, + const DenseElementsAttr& edge_padding_high, + const DenseElementsAttr& strides); + } // namespace odml } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td index ea0ab47b25ccd8..5030777490c5d8 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td @@ -304,6 +304,7 @@ def : Pat<(MHLO_DotGeneralOp:$old_value $dot_dimension_numbers, $precision_config), (ConvertDotGeneralOp $old_value)>; + def IsZero : Constraint() == 0">>; def ConvertPadOp : NativeCodeCall< diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc index e3c52f9d529532..53a89ac16ee0b6 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc @@ -27,6 +27,7 @@ limitations under the License. #include "mlir/Support/TypeID.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" // IWYU pragma: keep +#include "tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/pad.h" #include "tensorflow/compiler/mlir/lite/transforms/passes.h" // IWYU pragma: keep #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" // IWYU pragma: keep #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" @@ -52,13 +53,13 @@ void LegalizeHloToTfLitePass::runOnOperation() { MLIRContext& context = getContext(); RewritePatternSet patterns(&getContext()); // Add new conversion patterns here. - // patterns.add<>(&context); + patterns.add(&context); ConversionTarget target(context); target.addLegalDialect(); target.addLegalOp(); // Converted MHLO ops should be marked illegal here. - // target.addIllegalOp<>(); + target.addIllegalOp(); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) { getOperation().emitError("mhlo to TFLite legalization failed."); From fad9aa2ac91f765c164746263151668b8230535e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 10:04:52 -0700 Subject: [PATCH 091/567] Internal change only. PiperOrigin-RevId: 567341035 --- .../xla/third_party/tsl/tsl/platform/default/logging.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/logging.h b/third_party/xla/third_party/tsl/tsl/platform/default/logging.h index 0b934012c1f3cb..3dba4b3b8653b7 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/logging.h +++ b/third_party/xla/third_party/tsl/tsl/platform/default/logging.h @@ -583,6 +583,10 @@ class TFLogEntry { std::string ToString() const { return message_; } absl::string_view text_message() const { return message_; } + // Returning similar result as `text_message` as there is no prefix in this + // implementation. + absl::string_view text_message_with_prefix() const { return message_; } + private: const absl::LogSeverity severity_; const std::string fname_; From 7e711e5754d30c78201885e9ae0ad2eaebb109b3 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 11:04:49 -0700 Subject: [PATCH 092/567] [stream_executor] NFC: Add dnn dependency to stream executor impl https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 567359965 --- third_party/xla/xla/stream_executor/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index f2c8cffb029550..677b085f83a7db 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -781,6 +781,7 @@ cc_library( deps = [ ":device_description", ":device_memory", + ":dnn", ":dnn_proto_cc", ":event", ":kernel", From 0c637e6d2eb9223355693e543c28fc4a705ce3e1 Mon Sep 17 00:00:00 2001 From: Raviteja Gorijala Date: Thu, 21 Sep 2023 11:16:14 -0700 Subject: [PATCH 093/567] Update release branch string for Apple Silicon MacOS builds PiperOrigin-RevId: 567363555 --- tensorflow/tools/ci_build/update_version.py | 33 ++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py index 513891192d8d2d..ea7fd29e58745f 100755 --- a/tensorflow/tools/ci_build/update_version.py +++ b/tensorflow/tools/ci_build/update_version.py @@ -36,7 +36,22 @@ SETUP_PY = "%s/tools/pip_package/setup.py" % TF_SRC_DIR README_MD = "./README.md" TENSORFLOW_BZL = "%s/tensorflow.bzl" % TF_SRC_DIR -RELEVANT_FILES = [TF_SRC_DIR, VERSION_H, SETUP_PY, README_MD] +TF_MAC_ARM64_CI_BUILD = ( + "%s/tools/ci_build/osx/arm64/tensorflow_as_build_release.Jenkinsfile" + % TF_SRC_DIR +) +TF_MAC_ARM64_CI_TEST = ( + "%s/tools/ci_build/osx/arm64/tensorflow_as_test_release.Jenkinsfile" + % TF_SRC_DIR +) +RELEVANT_FILES = [ + TF_SRC_DIR, + VERSION_H, + SETUP_PY, + README_MD, + TF_MAC_ARM64_CI_BUILD, + TF_MAC_ARM64_CI_TEST +] # Version type parameters. NIGHTLY_VERSION = 1 @@ -221,6 +236,20 @@ def update_tensorflow_bzl(old_version, new_version): 'VERSION = "%s"' % new_mmp, TENSORFLOW_BZL) +def update_m1_builds(old_version, new_version): + """Update M1 builds.""" + replace_string_in_line( + "RELEASE_BRANCH = 'r%s.%s'" % (old_version.major, old_version.minor), + "RELEASE_BRANCH = 'r%s.%s'" % (new_version.major, new_version.minor), + TF_MAC_ARM64_CI_BUILD, + ) + replace_string_in_line( + "RELEASE_BRANCH = 'r%s.%s'" % (old_version.major, old_version.minor), + "RELEASE_BRANCH = 'r%s.%s'" % (new_version.major, new_version.minor), + TF_MAC_ARM64_CI_TEST, + ) + + def major_minor_change(old_version, new_version): """Check if a major or minor change occurred.""" major_mismatch = old_version.major != new_version.major @@ -301,6 +330,8 @@ def main(): NIGHTLY_VERSION) else: new_version = Version.parse_from_string(args.version, REGULAR_VERSION) + # Update Apple Silicon release CI files for release builds only + update_m1_builds(old_version, new_version) update_version_h(old_version, new_version) update_setup_dot_py(old_version, new_version) From da4b96f223fee680d305d79e0ca35c6c7810ee9c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 11:16:50 -0700 Subject: [PATCH 094/567] Free GPU memory Streamz Metric. PiperOrigin-RevId: 567363752 --- tensorflow/core/tfrt/saved_model/BUILD | 2 ++ .../core/tfrt/saved_model/saved_model.cc | 9 +++++++- .../core/tfrt/saved_model/saved_model_util.cc | 23 +++++++++++++++++++ .../core/tfrt/saved_model/saved_model_util.h | 4 ++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/tfrt/saved_model/BUILD b/tensorflow/core/tfrt/saved_model/BUILD index fc06ddd3911993..5f1d8cc73c1aff 100644 --- a/tensorflow/core/tfrt/saved_model/BUILD +++ b/tensorflow/core/tfrt/saved_model/BUILD @@ -269,6 +269,8 @@ cc_library( "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@local_tsl//tsl/platform:protobuf", + "@local_xla//xla/stream_executor", + "@local_xla//xla/stream_executor/gpu:gpu_init", "@tf_runtime//:bef", "@tf_runtime//:hostcontext", "@tf_runtime//:init_tfrt_dialects", diff --git a/tensorflow/core/tfrt/saved_model/saved_model.cc b/tensorflow/core/tfrt/saved_model/saved_model.cc index 81eb443bb3bb0d..e9134850d72cd4 100644 --- a/tensorflow/core/tfrt/saved_model/saved_model.cc +++ b/tensorflow/core/tfrt/saved_model/saved_model.cc @@ -752,12 +752,19 @@ tensorflow::Status SavedModelImpl::Run( DCHECK(runner_table); DCHECK(resource_array); - return GraphExecutionRunOnFunction( + auto status = GraphExecutionRunOnFunction( options_.graph_execution_options, run_options, name, *symbol_uids, func, loaded_executable, inputs, outputs, resource_context, client_graph_resource_context, runner_table, resource_array, runtime(), *fallback_state_, fallback_state_->process_function_library_runtime(), &req_deadline_tracker_, /*stream_callback_id=*/std::nullopt); + + if (options_.graph_execution_options.compile_options.device_target == + TfrtDeviceInfraTarget::kGpu) { + RecordFreeGpuMemory(); + } + + return status; } struct SavedModelImpl::JoinedSignature { diff --git a/tensorflow/core/tfrt/saved_model/saved_model_util.cc b/tensorflow/core/tfrt/saved_model/saved_model_util.cc index f7c4463ce06213..dc3671c57cab11 100644 --- a/tensorflow/core/tfrt/saved_model/saved_model_util.cc +++ b/tensorflow/core/tfrt/saved_model/saved_model_util.cc @@ -47,6 +47,9 @@ limitations under the License. #include "tensorflow/compiler/mlir/tfrt/transforms/gpu_passes.h" #include "tensorflow/compiler/mlir/tfrt/translate/import_model.h" #include "tensorflow/compiler/mlir/tfrt/translate/tfrt_compile_options.h" +#include "xla/stream_executor/gpu/gpu_init.h" +#include "xla/stream_executor/platform.h" +#include "xla/stream_executor/stream_executor_pimpl.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/monitoring/gauge.h" #include "tensorflow/core/protobuf/meta_graph.pb.h" @@ -78,6 +81,10 @@ auto* saved_model_grappler_time_seconds = "/tensorflow/tfrt/saved_model/grappler_time", "Record the grappler time for the savedmodel.", "model_name"); +auto* free_gpu_memory = tensorflow::monitoring::Gauge::New( + "/tensorflow/tfrt/saved_model/free_gpu_memory", + "Record the free GPU memory.", "gpu_id"); + std::vector FindNamesForValidSignatures( const tensorflow::MetaGraphDef& meta_graph_def) { std::vector valid_signature_names; @@ -278,5 +285,21 @@ void RegisterTFRTDialectsForAoT(mlir::DialectRegistry& registry) { tensorflow::RegisterGpuDialects(®istry); } +void RecordFreeGpuMemory() { + se::Platform* gpu_manager = se::GPUMachineManager(); + if (gpu_manager == nullptr || gpu_manager->VisibleDeviceCount() <= 0) return; + + for (int i = 0; i < gpu_manager->VisibleDeviceCount(); ++i) { + se::StreamExecutor* se = gpu_manager->ExecutorForDevice(i).value(); + int64_t free_memory = 0, total_memory = 0; + DCHECK(se->DeviceMemoryUsage(&free_memory, &total_memory)); + free_gpu_memory->GetCell(std::to_string(i))->Set(free_memory); + } +} + +int64_t GetFreeGpuMemory(int gpu_id) { + return free_gpu_memory->GetCell(std::to_string(gpu_id))->value(); +} + } // namespace tfrt_stub } // namespace tensorflow diff --git a/tensorflow/core/tfrt/saved_model/saved_model_util.h b/tensorflow/core/tfrt/saved_model/saved_model_util.h index 4b4608cdef1fbf..00fe1f87df7f9c 100644 --- a/tensorflow/core/tfrt/saved_model/saved_model_util.h +++ b/tensorflow/core/tfrt/saved_model/saved_model_util.h @@ -131,6 +131,10 @@ absl::Status DeserializeAoTMlirModule( void RegisterTFRTDialectsForAoT(mlir::DialectRegistry& registry); +void RecordFreeGpuMemory(); + +int64_t GetFreeGpuMemory(int gpu_id); + } // namespace tfrt_stub } // namespace tensorflow From 4280d60e8bce6bf7e20ab6bdd0170024314314d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 11:29:02 -0700 Subject: [PATCH 095/567] Add support for the kOptimizationBarrier HLO in auto-sharding. We treat the op as an elementwise op. PiperOrigin-RevId: 567367690 --- .../xla/xla/hlo/experimental/auto_sharding/auto_sharding.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding.cc b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding.cc index 8d2fda21ec47b8..e0b07bb3d98348 100644 --- a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding.cc +++ b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding.cc @@ -1893,6 +1893,7 @@ BuildStrategyAndCost(const HloInstructionSequence& sequence, case HloOpcode::kCbrt: case HloOpcode::kTan: case HloOpcode::kTanh: + case HloOpcode::kOptimizationBarrier: // Binary elementwise operations case HloOpcode::kAdd: case HloOpcode::kAtan2: From 7cdbe3889e8fcfec86ed3378f4fc57d312a00a95 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Thu, 21 Sep 2023 11:43:02 -0700 Subject: [PATCH 096/567] [XLA:GPU] Move dot dimension merger before padding for cuBLAS to enable more Triton fusions. Inside the padding pass for cuBLAS the Triton GEMM rewriter tells whether a dot should be handled by Triton, if so padding is skipped and Triton is used later. Merging dot dimensions before this makes Triton handle more of them. PiperOrigin-RevId: 567372057 --- third_party/xla/xla/service/gpu/BUILD | 18 +++++++------ .../xla/xla/service/gpu/gpu_compiler.cc | 3 --- .../xla/xla/service/gpu/nvptx_compiler.cc | 4 ++- .../xla/service/gpu/nvptx_compiler_test.cc | 26 +++++++++++++++++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 8646507b0eef2f..abba4f41bd9c97 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -2669,7 +2669,6 @@ cc_library( "//xla/service:convolution_pred_expander", "//xla/service:copy_insertion", "//xla/service:dot_decomposer", - "//xla/service:dot_dimension_merger", "//xla/service:dot_merger", "//xla/service:dump", "//xla/service:dynamic_dimension_simplifier", @@ -2853,6 +2852,7 @@ cc_library( "//xla/service:convert_mover", "//xla/service:dump", "//xla/hlo/ir:hlo", + "//xla/service:dot_dimension_merger", "//xla/service:float_normalization", "//xla/service:float_support", "//xla/service:hlo_constant_folding", @@ -2890,21 +2890,23 @@ xla_cc_test( srcs = if_gpu_is_configured([ "nvptx_compiler_test.cc", ]), - tags = tf_cuda_tests_tags() + [ - "no_rocm", - "nomsan", # Pulls in precompiled NVIDIA libraries which cause false - # positives in msan. + tags = [ + "nomsan", # Pulls in precompiled NVIDIA libraries which cause false positives in msan. + "requires-gpu-sm70", ], deps = [ ":nvptx_compiler_impl", - "//xla:status_macros", + "//xla:statusor", "//xla:util", "//xla/hlo/ir:hlo", + "//xla/service:backend", "//xla/service:buffer_assignment", "//xla/service:gpu_plugin", - "//xla/service:hlo_parser", "//xla/tests:hlo_test_base", - "//xla/tests:xla_internal_test_main", # build_cleaner: keep + "//xla/tests:xla_internal_test_main", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + "@local_tsl//tsl/platform:statusor", ], ) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index bad85be4677da6..1d5c7c106906be 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -75,7 +75,6 @@ limitations under the License. #include "xla/service/convolution_pred_expander.h" #include "xla/service/copy_insertion.h" #include "xla/service/dot_decomposer.h" -#include "xla/service/dot_dimension_merger.h" #include "xla/service/dot_merger.h" #include "xla/service/dump.h" #include "xla/service/dynamic_dimension_simplifier.h" @@ -903,8 +902,6 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( { HloPassPipeline pipeline("hlo normalization"); - pipeline.AddPass(); - // The LayoutAssignment pass may leave behind kCopy instructions which are // duplicate or NOPs, so remove them with algebraic simplification and CSE. AlgebraicSimplifierOptions options; diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler.cc b/third_party/xla/xla/service/gpu/nvptx_compiler.cc index 01cf4fed6a3e1f..8dd3b2fc8d74c6 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler.cc +++ b/third_party/xla/xla/service/gpu/nvptx_compiler.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include #include -#include #include #include "absl/base/call_once.h" @@ -33,6 +32,7 @@ limitations under the License. #include "xla/service/algebraic_simplifier.h" #include "xla/service/call_inliner.h" #include "xla/service/convert_mover.h" +#include "xla/service/dot_dimension_merger.h" #include "xla/service/dump.h" #include "xla/service/float_normalization.h" #include "xla/service/float_support.h" @@ -244,6 +244,8 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment( TF_RETURN_IF_ERROR(mha_fusion_pipeline.Run(hlo_module).status()); } + pre_pipeline.AddPass(); + for (const CublasPaddingRequirement& requirement : CublasPaddingRequirements) { if (cuda_compute_capability.IsAtLeast(requirement.min_compute_capability)) { diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc b/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc index 264f2df97cf5db..9a76f025c9341b 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc +++ b/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc @@ -17,12 +17,15 @@ limitations under the License. #include +#include +#include "absl/strings/string_view.h" #include "xla/hlo/ir/hlo_instruction.h" +#include "xla/service/backend.h" #include "xla/service/buffer_assignment.h" -#include "xla/service/hlo_parser.h" -#include "xla/status_macros.h" +#include "xla/statusor.h" #include "xla/tests/hlo_test_base.h" #include "xla/util.h" +#include "tsl/platform/statusor.h" namespace xla { namespace gpu { @@ -93,5 +96,24 @@ ENTRY entry { all_reduce, {1}, all_reduce->operand(1), {})); } +TEST_F(NVPTXCompilerTest, + DotDimensionAreSortedBeforePaddingForCublasEnablingTritonFusion) { + MatchOptimizedHlo(R"( +ENTRY e { + p0 = f16[11,22,33,44] parameter(0) + p1 = s8[11,22,33,44] parameter(1) + p1c = f16[11,22,33,44] convert(p1) + ROOT d = f16[11,22,44,44] dot(p0, p1c), + lhs_batch_dims={0,1}, lhs_contracting_dims={2}, + rhs_batch_dims={0,1}, rhs_contracting_dims={2} +})", + R"( +; CHECK: ENTRY +; CHECK-NEXT: parameter +; CHECK-NEXT: parameter +; CHECK-NEXT: __triton_gemm + )"); +} + } // namespace gpu } // namespace xla From dad0396ad15f178b58a50d612aec47af7ffea15f Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 11:58:24 -0700 Subject: [PATCH 097/567] [stream_executor] NFC: Remove unused timer target https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 567376822 --- .../c/experimental/stream_executor/BUILD | 1 - third_party/xla/xla/service/BUILD | 1 - third_party/xla/xla/stream_executor/BUILD | 34 ------------------- 3 files changed, 36 deletions(-) diff --git a/tensorflow/c/experimental/stream_executor/BUILD b/tensorflow/c/experimental/stream_executor/BUILD index 84df975cffa367..39b81d93fb1723 100644 --- a/tensorflow/c/experimental/stream_executor/BUILD +++ b/tensorflow/c/experimental/stream_executor/BUILD @@ -49,7 +49,6 @@ cc_library( "@local_xla//xla/stream_executor:multi_platform_manager", "@local_xla//xla/stream_executor:platform", "@local_xla//xla/stream_executor:stream_executor_pimpl", - "@local_xla//xla/stream_executor:timer", ], ) diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 934cbf95b298be..04003d8f96c093 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -1448,7 +1448,6 @@ cc_library( "//xla/stream_executor", "//xla/stream_executor:device_description", "//xla/stream_executor:device_memory_allocator", - "//xla/stream_executor:timer", "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings:str_format", diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 677b085f83a7db..15e3642d4afd89 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -557,40 +557,6 @@ cc_library( ], ) -cc_library( - name = "timer", - srcs = [ - "device_description.h", - "kernel_cache_config.h", - ], - hdrs = [ - "blas.h", - "kernel.h", - "stream.h", - "stream_executor.h", - ], - visibility = ["//visibility:public"], - deps = [ - ":data_type", - ":device_description", - ":device_description_proto_cc", - ":kernel_cache_config", - ":platform", - ":stream_executor_headers", - ":stream_executor_pimpl_header", - "//xla/stream_executor/platform", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:logging", - "@local_tsl//tsl/platform:statusor", - "@local_tsl//tsl/protobuf:dnn_proto_cc", - ], -) - cc_library( name = "blas", srcs = ["blas.cc"], From 0b836ba91ec31f7b64b22471bc0e3a03090c5ce5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 12:15:29 -0700 Subject: [PATCH 098/567] Implement Zero-Point-Offset Calculation for Lowering Dot/DotGeneral Ops in ConvertMhloQuantToInt Pass Currently this implementation supports DotGeneral and Dot (which is a specical case of DotGeneral). It supports static shapes only. Dynamic shapes support can be added later if needed. PiperOrigin-RevId: 567382083 --- .../mlir/quantization/stablehlo/BUILD | 1 + .../bridge/convert_mhlo_quant_to_int.cc | 299 +++++++++++++- .../bridge/convert-mhlo-quant-to-int.mlir | 384 ++++++++++++++++-- 3 files changed, 646 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/BUILD b/tensorflow/compiler/mlir/quantization/stablehlo/BUILD index a4618d6de03082..77cb6b3f38ad06 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/BUILD +++ b/tensorflow/compiler/mlir/quantization/stablehlo/BUILD @@ -142,6 +142,7 @@ cc_library( "//tensorflow/core/framework:numeric_types", "//tensorflow/core/util/quantization:uniform_quant_ops_attr_proto_cc", "//tensorflow/core/util/quantization:uniform_quant_ops_params", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/log", "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc index 255c69573f08eb..0a081269b4db4e 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" @@ -29,11 +30,14 @@ limitations under the License. #include "mlir/Dialect/Quant/QuantTypes.h" // from @llvm-project #include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project #include "mlir/IR/BuiltinTypes.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/OperationSupport.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/Region.h" // from @llvm-project #include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project @@ -591,6 +595,256 @@ LogicalResult matchAndRewriteDotLikeOp(OpType &op, OpAdaptorType &adaptor, return success(); } +Value CreateZeroPointPartialOffset(OpBuilder &builder, Location loc, + Value tensor, const int64_t other_tensor_zp, + ArrayRef contracting_dims) { + // This function calculates part of the zero-point-offset by using + // mhlo::Reduce to sum over the contracting dims of the tensor, and then + // multiply by zp of the other tensor. + auto output_element_type = builder.getI32Type(); + + // Calculate the output tensor shape. This is input tensor dims minus + // contracting dims. + auto ranked_tensor = tensor.getType().dyn_cast(); + llvm::SmallVector output_dims; + for (int64_t i = 0; i < ranked_tensor.getRank(); ++i) { + if (absl::c_count(contracting_dims, i) == 0) { + output_dims.push_back(ranked_tensor.getDimSize(i)); + } + } + + // Convert input tensor to output type since mhlo::Reduce only supports same + // element type for input/output. + tensor = builder.create( + loc, tensor.getType().dyn_cast().clone(output_element_type), + tensor); + auto reducer_tensor_type = RankedTensorType::get({}, output_element_type); + + // Initial value for reduced tensor. This is set 0. + Value init_values = builder.create( + loc, DenseIntElementsAttr::get(reducer_tensor_type, {0})); + mhlo::ReduceOp reduce = builder.create( + loc, RankedTensorType::get(output_dims, output_element_type), tensor, + init_values, builder.getI64TensorAttr(contracting_dims)); + // Define reducer function to compute sum. + Region ®ion = reduce.getBody(); + Block &block = region.emplaceBlock(); + block.addArgument(reducer_tensor_type, loc); + block.addArgument(reducer_tensor_type, loc); + auto *firstArgument = block.args_begin(); + auto secondArgument = block.args_rbegin(); + { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(&block); + Value sum = + builder.create(loc, *firstArgument, *secondArgument); + builder.create(loc, sum); + } + Value zp = builder.create( + loc, builder.getI32IntegerAttr(other_tensor_zp)); + Value mul_op = builder.create(loc, reduce.getResult(0), + zp, nullptr); + return mul_op; +} + +llvm::SmallVector CalculateBroadcastDims( + Value zp_contribution, llvm::ArrayRef contracting_dims, + llvm::ArrayRef batching_dims, int64_t non_batching_starting_idx) { + // This function calculates the dims for broadcasting from the + // zero-point-offset tensor to the final output tensor. + auto zp_contribution_rank = + zp_contribution.getType().dyn_cast().getRank(); + llvm::SmallVector broadcast_dims; + broadcast_dims.resize(zp_contribution_rank, 0); + // Result tensor will have batching dims first, then LHS result dims, then + // RHS result dims. So non-batching result dims index doesn't start from 0. + // The arg non_batching_starting_idx is used distinguish LHS and RHS. + int64_t result_batching_idx = 0; + int64_t result_non_batching_idx = non_batching_starting_idx; + for (int64_t idx = 0, original_idx = 0; idx < zp_contribution_rank; + ++idx, ++original_idx) { + // zp_contribution has removed contracting dims from the tensor. The + // following recovers the index in the original tensor. + while (absl::c_count(contracting_dims, original_idx) != 0) { + original_idx++; + } + if (absl::c_count(batching_dims, original_idx) == 0) { + broadcast_dims[idx] = result_non_batching_idx++; + } else { + broadcast_dims[idx] = result_batching_idx++; + } + } + return broadcast_dims; +} + +Value CalculateZeroPointOffset(OpBuilder &builder, Location loc, Value lhs, + Value rhs, int64_t lhs_zp, int64_t rhs_zp, + mhlo::DotDimensionNumbersAttr dims) { + // According to StableHLO spec, the output tensor has dims in the following + // order: + // batching dims, LHS result dims, RHS result dims + // where LHS/RHS result dims are any dims that are neither batching dims nor + // contracting dims. + llvm::SmallVector output_dims; + mlir::ShapedType lhs_shape = lhs.getType().cast(); + mlir::ShapedType rhs_shape = rhs.getType().cast(); + for (int64_t i = 0; i < lhs_shape.getRank(); ++i) { + if (absl::c_count(dims.getLhsBatchingDimensions(), i) != 0) { + output_dims.push_back(lhs_shape.getDimSize(i)); + } + } + for (int64_t i = 0; i < lhs_shape.getRank(); ++i) { + if (absl::c_count(dims.getLhsContractingDimensions(), i) == 0 && + absl::c_count(dims.getLhsBatchingDimensions(), i) == 0) { + output_dims.push_back(lhs_shape.getDimSize(i)); + } + } + for (int64_t i = 0; i < rhs_shape.getRank(); ++i) { + if (absl::c_count(dims.getRhsContractingDimensions(), i) == 0 && + absl::c_count(dims.getRhsBatchingDimensions(), i) == 0) { + output_dims.push_back(rhs_shape.getDimSize(i)); + } + } + auto output_element_type = builder.getI32Type(); + auto output_tensor_type = + RankedTensorType::get(output_dims, output_element_type); + + Value result = builder.create( + loc, DenseIntElementsAttr::get(output_tensor_type, {0})); + + // Calculate LHS contribution when RHS zp is non-zero. + if (rhs_zp != 0) { + Value lhs_zp_contribution = CreateZeroPointPartialOffset( + builder, loc, lhs, rhs_zp, dims.getLhsContractingDimensions()); + // Broadcast lhs ZP contribution to result tensor shape. + llvm::SmallVector broadcast_dims = CalculateBroadcastDims( + lhs_zp_contribution, dims.getLhsContractingDimensions(), + dims.getLhsBatchingDimensions(), + dims.getLhsBatchingDimensions().size()); + lhs_zp_contribution = builder.create( + loc, output_tensor_type, lhs_zp_contribution, + DenseIntElementsAttr::get( + RankedTensorType::get({static_cast(broadcast_dims.size())}, + builder.getI64Type()), + broadcast_dims)); + result = builder.create(loc, result, lhs_zp_contribution); + } + // Calculate RHS contribution when LHS zp is non-zero. + if (lhs_zp != 0) { + Value rhs_zp_contribution = CreateZeroPointPartialOffset( + builder, loc, rhs, lhs_zp, dims.getRhsContractingDimensions()); + // Broadcast rhs ZP contribution to result tensor shape. + llvm::SmallVector broadcast_dims = CalculateBroadcastDims( + rhs_zp_contribution, dims.getRhsContractingDimensions(), + dims.getRhsBatchingDimensions(), + lhs_shape.getRank() - dims.getLhsContractingDimensions().size()); + + rhs_zp_contribution = builder.create( + loc, output_tensor_type, rhs_zp_contribution, + DenseIntElementsAttr::get( + RankedTensorType::get({static_cast(broadcast_dims.size())}, + builder.getI64Type()), + broadcast_dims)); + result = builder.create(loc, result, rhs_zp_contribution); + } + + if (lhs_zp != 0 && rhs_zp != 0) { + // Contributions from LHS_ZP * RHS_ZP. + // This is multiplied by the product of all contracting dimensions. + int32_t contracting_dim_total = 1; + for (const int64_t rhs_idx : dims.getRhsContractingDimensions()) { + contracting_dim_total *= rhs_shape.getDimSize(rhs_idx); + } + const int32_t zp_constant_offset = static_cast(lhs_zp) * + static_cast(rhs_zp) * + contracting_dim_total; + auto zp_offset_value = builder.create( + loc, builder.getI32IntegerAttr(zp_constant_offset)); + result = builder.create(loc, result, zp_offset_value, + nullptr); + } + return result; +} + +template +LogicalResult RewriteDotGeneralOp(DotOp op, DotOpAdaptor adaptor, + ArrayRef attrs, + const mhlo::DotDimensionNumbersAttr &dims, + ConversionPatternRewriter &rewriter) { + // Lower Dot/DotGeneral UQ ops to DotGeneral int. + // Assumes that operands and results are static-shape tensors of uq types. + auto lhs_element_quant_type = + getElementTypeOrSelf(op.getLhs().getType()) + .template dyn_cast(); + auto rhs_element_quant_type = + getElementTypeOrSelf(op.getRhs().getType()) + .template dyn_cast(); + auto res_element_quant_type = + getElementTypeOrSelf(op.getResult()) + .template dyn_cast(); + Value lhs = adaptor.getLhs(); + Value rhs = adaptor.getRhs(); + auto res_int32_tensor_type = + op.getResult().getType().clone(rewriter.getI32Type()); + + // Dot result + // = dot((lhs - zp_l) * scale_l, (rhs - zp_r) * scale_r) / scale_res + // + zp_res + // = dot(lhs - zp_l, rhs - zp_r) * scale_l * scale_r / scale_res + zp_res + // = dot(lhs, rhs) * combined_scale + combined_zp + // where: + // zp_offset = zp_l*rhs + zp_r*lhs - zp_l*zp_r + // combined_scale = scale_l * scale_r / scale_res + // combined_zp = res_zp - zp_offset * combined_scale + SmallVector operands{lhs, rhs}; + Value res_i32 = rewriter.create( + op->getLoc(), res_int32_tensor_type, operands, attrs); + + Value zp_offset = CalculateZeroPointOffset( + rewriter, op->getLoc(), lhs, rhs, lhs_element_quant_type.getZeroPoint(), + rhs_element_quant_type.getZeroPoint(), dims); + + // Multiply dot result and zp_offset by combined_scale only if it is not 1.0. + float combined_scale_fp = lhs_element_quant_type.getScale() * + rhs_element_quant_type.getScale() / + res_element_quant_type.getScale(); + if (combined_scale_fp != 1.0f) { + Value combined_scale = rewriter.create( + op->getLoc(), rewriter.getF32FloatAttr(combined_scale_fp)); + + auto res_float32_tensor_type = + op.getResult().getType().clone(rewriter.getF32Type()); + Value res_f32 = rewriter.create( + op->getLoc(), res_float32_tensor_type, res_i32); + res_f32 = rewriter.create( + op->getLoc(), res_float32_tensor_type, res_f32, combined_scale, + nullptr); + res_i32 = rewriter.create(op->getLoc(), + res_int32_tensor_type, res_f32); + + auto zp_offset_float32_tensor_type = + zp_offset.getType().dyn_cast().clone(rewriter.getF32Type()); + zp_offset = rewriter.create( + op->getLoc(), zp_offset_float32_tensor_type, zp_offset); + zp_offset = rewriter.create( + op->getLoc(), zp_offset_float32_tensor_type, zp_offset, combined_scale, + nullptr); + zp_offset = rewriter.create( + op->getLoc(), + zp_offset_float32_tensor_type.clone(rewriter.getI32Type()), zp_offset); + } + + Value res_zp = rewriter.create( + op->getLoc(), + rewriter.getI32IntegerAttr(res_element_quant_type.getZeroPoint())); + Value combined_zp = rewriter.create( + op->getLoc(), res_int32_tensor_type, res_zp, zp_offset, nullptr); + + rewriter.replaceOpWithNewOp(op, res_int32_tensor_type, res_i32, + combined_zp); + return success(); +} + class ConvertUniformQuantizedDotOp : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -598,7 +852,32 @@ class ConvertUniformQuantizedDotOp : public OpConversionPattern { LogicalResult matchAndRewrite( mhlo::DotOp op, mhlo::DotOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - return matchAndRewriteDotLikeOp(op, adaptor, rewriter); + // Use matchAndRewriteDotLikeOp for DotHybrid and dynamic shapes. + if (!op.getLhs() + .getType() + .getElementType() + .isa() || + !op.getRhs() + .getType() + .getElementType() + .isa() || + !op.getLhs().getType().cast().hasStaticShape() || + !op.getRhs().getType().cast().hasStaticShape() || + !op.getResult().getType().cast().hasStaticShape()) { + return matchAndRewriteDotLikeOp(op, adaptor, rewriter); + } + + // DotOp is a special case of DotGeneralOp, where LHS and RHS are both + // rank-2 tensors and have contracting dims of 1 and 0 respectively. + auto dims = mhlo::DotDimensionNumbersAttr::get( + rewriter.getContext(), /*lhsBatchingDimensions=*/{}, + /*rhsBatchingDimensions=*/{}, /*lhsContractingDimensions=*/{1}, + /*rhsContractingDimensions=*/{0}); + llvm::SmallVector attrs(op->getAttrs()); + attrs.push_back( + {StringAttr::get(rewriter.getContext(), "dot_dimension_numbers"), + dims}); + return RewriteDotGeneralOp(op, adaptor, attrs, dims, rewriter); } }; @@ -610,7 +889,23 @@ class ConvertUniformQuantizedDotGeneralOp LogicalResult matchAndRewrite( mhlo::DotGeneralOp op, mhlo::DotGeneralOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - return matchAndRewriteDotLikeOp(op, adaptor, rewriter); + // Use matchAndRewriteDotLikeOp for DotHybridGeneral case and dynamic + // shapes. + if (!op.getLhs() + .getType() + .getElementType() + .isa() || + !op.getRhs() + .getType() + .getElementType() + .isa() || + !op.getLhs().getType().cast().hasStaticShape() || + !op.getRhs().getType().cast().hasStaticShape() || + !op.getResult().getType().cast().hasStaticShape()) { + return matchAndRewriteDotLikeOp(op, adaptor, rewriter); + } + return RewriteDotGeneralOp(op, adaptor, op->getAttrs(), + op.getDotDimensionNumbers(), rewriter); } }; diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir b/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir index 863c020e96b09e..9e00c80fb5aab5 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir +++ b/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir @@ -303,8 +303,8 @@ func.func @uniform_quantize_requantize_merged_zp_zero_and_dequantize(%arg0: tens // ----- -// CHECK-LABEL: func @uniform_quantize_dot_dequantize -func.func @uniform_quantize_dot_dequantize(%arg0: tensor, %arg1: tensor) -> tensor { +// CHECK-LABEL: func @uniform_quantize_dot_dequantize_dynamic +func.func @uniform_quantize_dot_dequantize_dynamic(%arg0: tensor, %arg1: tensor) -> tensor { %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> @@ -327,8 +327,8 @@ func.func @uniform_quantize_dot_dequantize(%arg0: tensor, %arg1: tensor // ----- -// CHECK-LABEL: func @uniform_quantize_dot_int4 -func.func @uniform_quantize_dot_int4(%arg0: tensor, %arg1: tensor) { +// CHECK-LABEL: func @uniform_quantize_dot_dynamic_int4 +func.func @uniform_quantize_dot_dynamic_int4(%arg0: tensor, %arg1: tensor) { %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> @@ -344,51 +344,363 @@ func.func @uniform_quantize_dot_int4(%arg0: tensor, %arg1: tensor, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + + // CHECK: "mhlo.dot_general" + // CHECK-SAME: lhs_contracting_dimensions = [1] + // CHECK-SAME: rhs_contracting_dimensions = [0] + // CHECK-SAME: (tensor<2x2xi8>, tensor<2x2xi8>) -> tensor<2x2xi32> + %2 = "mhlo.dot" (%0, %1) : (tensor<2x2x!quant.uniform>, tensor<2x2x!quant.uniform>) -> tensor<2x2x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : (tensor<2x2x!quant.uniform>) -> tensor<2x2xf32> + return %3 : tensor<2x2xf32> +} + +// ----- + +// CHECK-LABEL: func @uniform_quantize_dot_int4 +func.func @uniform_quantize_dot_int4(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + + // CHECK: "mhlo.dot_general" + // CHECK-SAME: lhs_contracting_dimensions = [1] + // CHECK-SAME: rhs_contracting_dimensions = [0] + // CHECK-SAME: (tensor<2x2xi4>, tensor<2x2xi4>) -> tensor<2x2xi32> + %2 = "mhlo.dot" (%0, %1): (tensor<2x2x!quant.uniform>, tensor<2x2x!quant.uniform>) -> tensor<2x2x!quant.uniform> + return +} + +// ----- + // CHECK-LABEL: func @uniform_quantize_dot_general_dequantize -func.func @uniform_quantize_dot_general_dequantize(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> - %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> +func.func @uniform_quantize_dot_general_dequantize( + %arg0: tensor<2x5x6xf32>, %arg1: tensor<6x8x2xf32>) -> tensor<2x5x8xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x5x6xf32>) + -> tensor<2x5x6x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<6x8x2xf32>) + -> tensor<6x8x2x!quant.uniform> + + // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" + // CHECK-SAME: lhs_batching_dimensions = [0] + // CHECK-SAME: rhs_batching_dimensions = [2] + // CHECK-SAME: lhs_contracting_dimensions = [2] + // CHECK-SAME: rhs_contracting_dimensions = [0] + + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.constant dense<0> : tensor<2x5x8xi32> + + // Zero point offset contribution from LHS tensor * RHS ZP. + + // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x6xi8>) + // CHECK-SAME: -> tensor<2x5x6xi32> + // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [2] + // CHECK-SAME: (tensor<2x5x6xi32>, tensor) + // CHECK-SAME: -> tensor<2x5xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor + // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<2x5xi32>, tensor) -> tensor<2x5xi32> + // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[0, 1]> + // CHECK-SAME: (tensor<2x5xi32>) -> tensor<2x5x8xi32> + // CHECK: %[[ZP_TOTAL_2:.*]] = mhlo.add %[[ZP_TOTAL_1]], %[[LHS_ZP_BCAST]] + + // Zero point offset contribution from RHS tensor * LHS ZP. + + // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x8x2xi8>) + // CHECK-SAME: -> tensor<6x8x2xi32> + // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [0] + // CHECK-SAME: (tensor<6x8x2xi32>, tensor) + // CHECK-SAME: -> tensor<8x2xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor + // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<8x2xi32>, tensor) -> tensor<8x2xi32> + // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> + // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.add %[[ZP_TOTAL_2]], %[[RHS_ZP_BCAST]] + + // Zero point offset contribution from LHS ZP * RHS ZP. + + // CHECK: %[[ZPS:.*]] = mhlo.constant dense<90> : tensor + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_3]], %[[ZPS]] + // CHECK-SAME: (tensor<2x5x8xi32>, tensor) -> tensor<2x5x8xi32> + + // Combine dot result with zero point offset and output final result. + + // CHECK: %[[COMBINED_SCALE:.*]] = mhlo.constant dense<5.000000e-01> : tensor + // CHECK: %[[RES_FP:.*]] = mhlo.convert %[[DOT_RES]] + // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> + // CHECK: %[[RES_FP_1:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RES_FP:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] + // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> + + // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> + // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_5:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_7:.*]] = mhlo.convert %[[ZP_TOTAL_6]] + // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> + + // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor + // CHECK: %[[ZP_TOTAL_8:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_7]] + // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> + // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_8]] - // CHECK: %[[VAL1:.*]] = mhlo.convert %[[VAL0:.*]] : (tensor) -> tensor - // CHECK: %[[VAL3:.*]] = chlo.broadcast_subtract %[[VAL1]], %[[VAL2:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL5:.*]] = mhlo.convert %[[VAL4:.*]] : (tensor) -> tensor - // CHECK: %[[VAL7:.*]] = chlo.broadcast_subtract %[[VAL5]], %[[VAL6:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL8:.*]] = "mhlo.dot_general"(%[[VAL3]], %[[VAL7]]) {dot_dimension_numbers = #mhlo.dot} : (tensor, tensor) -> tensor - // CHECK: %[[VAL10:.*]] = chlo.broadcast_multiply %[[VAL8]], %[[VAL9:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL12:.*]] = chlo.broadcast_add %[[VAL10]], %[[VAL11:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL13:.*]] = mhlo.floor %[[VAL12]] : tensor - // CHECK: %[[VAL15:.*]] = chlo.broadcast_add %[[VAL13]], %[[VAL14:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL16:.*]] = mhlo.convert %[[VAL15]] : (tensor) -> tensor - // CHECK: %[[VAL19:.*]] = mhlo.clamp %[[VAL17:.*]], %[[VAL16]], %[[VAL18:.*]] : (tensor, tensor, tensor) -> tensor - // CHECK: %[[VAL20:.*]] = mhlo.convert %[[VAL19]] : (tensor) -> tensor %2 = "mhlo.dot_general" (%0, %1) { dot_dimension_numbers = #mhlo.dot< - lhs_contracting_dimensions = [1], + lhs_batching_dimensions = [0], + rhs_batching_dimensions = [2], + lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [0] - >} : (tensor>, tensor>) -> tensor> - %3 = mhlo.uniform_dequantize %2 : (tensor>) -> tensor - return %3 : tensor + >} : ( + tensor<2x5x6x!quant.uniform>, + tensor<6x8x2x!quant.uniform> + ) -> tensor<2x5x8x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : ( + tensor<2x5x8x!quant.uniform> + ) -> tensor<2x5x8xf32> + return %3 : tensor<2x5x8xf32> } // ----- -// CHECK-LABEL: func @uniform_quantize_dot_general_int4 -func.func @uniform_quantize_dot_general_int4(%arg0: tensor, %arg1: tensor) { - %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> - %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> +// CHECK-LABEL: func @uniform_quantize_dot_general_dequantize_multiple_batching_dims +func.func @uniform_quantize_dot_general_dequantize_multiple_batching_dims( + %arg0: tensor<2x5x3x7x6xf32>, %arg1: tensor<6x2x7x8x3xf32>) -> tensor<2x3x5x8xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x5x3x7x6xf32>) + -> tensor<2x5x3x7x6x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<6x2x7x8x3xf32>) + -> tensor<6x2x7x8x3x!quant.uniform> + + // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" + // CHECK-SAME: lhs_batching_dimensions = [0, 2] + // CHECK-SAME: rhs_batching_dimensions = [1, 4] + // CHECK-SAME: lhs_contracting_dimensions = [4, 3] + // CHECK-SAME: rhs_contracting_dimensions = [0, 2]>} + + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.constant dense<0> : tensor<2x3x5x8xi32> + + // Zero point offset contribution from LHS tensor * RHS ZP. + + // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x3x7x6xi8>) + // CHECK-SAME: -> tensor<2x5x3x7x6xi32> + // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [4, 3] + // CHECK-SAME: (tensor<2x5x3x7x6xi32>, tensor) + // CHECK-SAME: -> tensor<2x5x3xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor + // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<2x5x3xi32>, tensor) -> tensor<2x5x3xi32> + // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[0, 2, 1]> + // CHECK-SAME: (tensor<2x5x3xi32>) -> tensor<2x3x5x8xi32> + // CHECK: %[[ZP_TOTAL_2:.*]] = mhlo.add %[[ZP_TOTAL_1]], %[[LHS_ZP_BCAST]] + + // Zero point offset contribution from RHS tensor * LHS ZP. + + // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x2x7x8x3xi8>) + // CHECK-SAME: -> tensor<6x2x7x8x3xi32> + // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [0, 2] + // CHECK-SAME: (tensor<6x2x7x8x3xi32>, tensor) + // CHECK-SAME: -> tensor<2x8x3xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor + // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<2x8x3xi32>, tensor) -> tensor<2x8x3xi32> + // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[0, 3, 1]> + // CHECK-SAME: (tensor<2x8x3xi32>) -> tensor<2x3x5x8xi32> + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.add %[[ZP_TOTAL_2]], %[[RHS_ZP_BCAST]] + + // Zero point offset contribution from LHS ZP * RHS ZP. + + // CHECK: %[[ZPS:.*]] = mhlo.constant dense<630> : tensor + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_3]], %[[ZPS]] + // CHECK-SAME: (tensor<2x3x5x8xi32>, tensor) -> tensor<2x3x5x8xi32> + + // Combine dot result with zero point offset and output final result. + + // CHECK: %[[COMBINED_SCALE:.*]] = mhlo.constant dense<5.000000e-01> : tensor + // CHECK: %[[RES_FP:.*]] = mhlo.convert %[[DOT_RES]] + // CHECK-SAME: (tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xf32> + // CHECK: %[[RES_FP_1:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RES_FP:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] + // CHECK-SAME: (tensor<2x3x5x8xf32>) -> tensor<2x3x5x8xi32> + + // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK-SAME: (tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xf32> + // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_5:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_7:.*]] = mhlo.convert %[[ZP_TOTAL_6]] + // CHECK-SAME: (tensor<2x3x5x8xf32>) -> tensor<2x3x5x8xi32> + + // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor + // CHECK: %[[ZP_TOTAL_8:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_7]] + // CHECK-SAME: (tensor, tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xi32> + // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_8]] + + %2 = "mhlo.dot_general" (%0, %1) { + dot_dimension_numbers = #mhlo.dot< + lhs_batching_dimensions = [0, 2], + rhs_batching_dimensions = [1, 4], + lhs_contracting_dimensions = [4, 3], + rhs_contracting_dimensions = [0, 2] + >} : ( + tensor<2x5x3x7x6x!quant.uniform>, + tensor<6x2x7x8x3x!quant.uniform> + ) -> tensor<2x3x5x8x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : ( + tensor<2x3x5x8x!quant.uniform> + ) -> tensor<2x3x5x8xf32> + return %3 : tensor<2x3x5x8xf32> +} + +// ----- + +// CHECK-LABEL: func @uniform_quantize_dot_general_dequantize_rhs_zero_zp +func.func @uniform_quantize_dot_general_dequantize_rhs_zero_zp( + %arg0: tensor<2x5x6xf32>, %arg1: tensor<6x8x2xf32>) -> tensor<2x5x8xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x5x6xf32>) + -> tensor<2x5x6x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<6x8x2xf32>) + -> tensor<6x8x2x!quant.uniform> + + // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" + // CHECK-SAME: lhs_batching_dimensions = [0] + // CHECK-SAME: rhs_batching_dimensions = [2] + // CHECK-SAME: lhs_contracting_dimensions = [2] + // CHECK-SAME: rhs_contracting_dimensions = [0] + + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.constant dense<0> : tensor<2x5x8xi32> + + // Zero point offset contribution from LHS tensor * RHS ZP is 0 and skipped. + + // Zero point offset contribution from RHS tensor * LHS ZP. + + // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x8x2xi8>) + // CHECK-SAME: -> tensor<6x8x2xi32> + // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [0] + // CHECK-SAME: (tensor<6x8x2xi32>, tensor) + // CHECK-SAME: -> tensor<8x2xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor + // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<8x2xi32>, tensor) -> tensor<8x2xi32> + // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> + // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> + // CHECK: %[[ZP_TOTAL_2:.*]] = mhlo.add %[[ZP_TOTAL_1]], %[[RHS_ZP_BCAST]] + + // Zero point offset contribution from LHS ZP * RHS ZP is 0 and skipped. + + // Combine dot result with zero point offset and output final result. + + // CHECK: %[[COMBINED_SCALE:.*]] = mhlo.constant dense<5.000000e-01> : tensor + // CHECK: %[[RES_FP:.*]] = mhlo.convert %[[DOT_RES]] + // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> + // CHECK: %[[RES_FP_1:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RES_FP:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] + // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> + + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> + + // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor + // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] + // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> + // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_6]] - // CHECK: %[[VAL2:.*]] = "mhlo.dot_general"(%[[VAL0:.*]], %[[VAL1:.*]]) {dot_dimension_numbers = #mhlo.dot} : (tensor, tensor) -> tensor - // CHECK: %[[VAL4:.*]] = mhlo.convert %[[VAL3:.*]] : (tensor) -> tensor - // CHECK-DAG: %[[VAL5:.*]] = mhlo.constant dense<-8> : tensor - // CHECK-DAG: %[[VAL6:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[VAL7:.*]] = mhlo.clamp %[[VAL5]], %[[VAL4]], %[[VAL6]] : (tensor, tensor, tensor) -> tensor - // CHECK: %[[VAL8:.*]] = mhlo.convert %[[VAL7]] : (tensor) -> tensor %2 = "mhlo.dot_general" (%0, %1) { dot_dimension_numbers = #mhlo.dot< - lhs_contracting_dimensions = [1], + lhs_batching_dimensions = [0], + rhs_batching_dimensions = [2], + lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [0] - >} : (tensor>, tensor>) -> tensor> - return + >} : ( + tensor<2x5x6x!quant.uniform>, + tensor<6x8x2x!quant.uniform> + ) -> tensor<2x5x8x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : ( + tensor<2x5x8x!quant.uniform> + ) -> tensor<2x5x8xf32> + return %3 : tensor<2x5x8xf32> +} + +// ----- + +// CHECK-LABEL: func @uniform_quantize_dot_general_dequantize_zero_zp +func.func @uniform_quantize_dot_general_dequantize_zero_zp( + %arg0: tensor<2x5x6xf32>, %arg1: tensor<6x8x2xf32>) -> tensor<2x5x8xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x5x6xf32>) + -> tensor<2x5x6x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<6x8x2xf32>) + -> tensor<6x8x2x!quant.uniform> + + // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" + // CHECK-SAME: lhs_batching_dimensions = [0] + // CHECK-SAME: rhs_batching_dimensions = [2] + // CHECK-SAME: lhs_contracting_dimensions = [2] + // CHECK-SAME: rhs_contracting_dimensions = [0] + + // Both LHS/RHS have zero zp. No zp contribution. + + // CHECK-DAG: %[[ZP_CONTRIB:.*]] = mhlo.constant dense<0> : tensor<2x5x8xi32> + + // CHECK-DAG: %[[COMBINED_SCALE:.*]] = mhlo.constant dense<1.500000e+00> : tensor + // CHECK: %[[RES_FP:.*]] = mhlo.convert %[[DOT_RES]] : + // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> + // CHECK: %[[RES_FP_1:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RES_FP:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] + // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> + + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.convert %[[ZP_CONTRIB]] + // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_1:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> + + // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_3]] + // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> + // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_4]] + + %2 = "mhlo.dot_general" (%0, %1) { + dot_dimension_numbers = #mhlo.dot< + lhs_batching_dimensions = [0], + rhs_batching_dimensions = [2], + lhs_contracting_dimensions = [2], + rhs_contracting_dimensions = [0] + >} : ( + tensor<2x5x6x!quant.uniform>, + tensor<6x8x2x!quant.uniform> + ) -> tensor<2x5x8x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : ( + tensor<2x5x8x!quant.uniform> + ) -> tensor<2x5x8xf32> + return %3 : tensor<2x5x8xf32> } // ----- From 5192adf96920853d699b9bb3ce405438934d3191 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Thu, 21 Sep 2023 12:22:59 -0700 Subject: [PATCH 099/567] [XLA:GPU] Trigger Triton GEMM fusions also on kCopy input operations. PiperOrigin-RevId: 567384050 --- .../xla/service/gpu/gemm_rewriter_triton.cc | 30 +++++++------------ .../xla/service/gpu/ir_emitter_triton_test.cc | 22 -------------- 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc index 8f237801bfa4cf..8f1e0399ddb13a 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc @@ -73,16 +73,6 @@ limitations under the License. namespace xla { namespace gpu { -int GetFusionLevel(const HloInstruction& hlo, const GpuVersion gpu_version) { - int level = - hlo.GetModule()->config().debug_options().xla_gpu_triton_fusion_level(); - if (!std::get(gpu_version) - .IsAtLeast(se::CudaComputeCapability::AMPERE)) { - level = std::min(level, 1); - } - return level; -} - bool HasDivisibleSuffixAllowingSplit(const absl::Span span, const int64_t divisor) { CHECK_GE(divisor, 1); @@ -1092,6 +1082,12 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( absl::flat_hash_map& old_to_new_mapping, const GpuVersion gpu_version) const { + int fusion_level = + hlo.GetModule()->config().debug_options().xla_gpu_triton_fusion_level(); + if (!std::get(gpu_version) + .IsAtLeast(se::CudaComputeCapability::AMPERE)) { + fusion_level = std::min(fusion_level, 1); + } if (hlo.opcode() == HloOpcode::kTuple || hlo.opcode() == HloOpcode::kGetTupleElement) { return "Unsupported instruction."; @@ -1112,7 +1108,7 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( return "Unsupported output data type."; } if (as_input) { - if (GetFusionLevel(hlo, gpu_version) < 2) { + if (fusion_level < 2) { if (hlo.opcode() == HloOpcode::kConvert) { if (FusionDecision decision = RequireTritonFusibleConvert(&hlo, gpu_version); @@ -1128,7 +1124,7 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( } } } else { - if (GetFusionLevel(hlo, gpu_version) < 2) { + if (fusion_level < 2) { return "Skipping fusing outputs at low fusion levels."; } for (const HloInstruction* operand : hlo.operands()) { @@ -1378,14 +1374,10 @@ StatusOr FuseDot(HloInstruction& dot, if (dot.GetModule()->config().debug_options().xla_gpu_triton_gemm_any()) { return FusionDecision{}; } - - absl::flat_hash_set triggers{ - HloOpcode::kConvert, HloOpcode::kSlice, HloOpcode::kTranspose}; - if (GetFusionLevel(dot, gpu_version) >= 2) { - triggers.insert(HloOpcode::kCopy); - } for (const auto& iter : old_to_new_mapping) { - if (triggers.contains(iter.second->opcode())) { + if (iter.second->opcode() == HloOpcode::kConvert || + iter.second->opcode() == HloOpcode::kSlice || + iter.second->opcode() == HloOpcode::kTranspose) { return FusionDecision{}; } } diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index af6776ed1c0231..65f6906475d8e2 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -1086,28 +1086,6 @@ ENTRY e { EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{/*aabs=*/2e-3, /*arel=*/2e-3})); } -TEST_F(TritonGemmLevel2Test, FuseTransposeWithoutMixedTypes) { - const std::string kHloText = R"( -ENTRY e { - p1 = f16[150,32,60]{2,1,0} parameter(1) - p0 = f16[75,2,26,60]{3,2,1,0} parameter(0) - t = f16[75,2,60,26]{3,2,1,0} transpose(p0), dimensions={0,1,3,2} - r = f16[150,60,26]{2,1,0} reshape(t) - ROOT tmp_4 = f16[150,32,26]{2,1,0} dot(p1, r), - lhs_batch_dims={0}, lhs_contracting_dims={2}, - rhs_batch_dims={0}, rhs_contracting_dims={1} -})"; - - TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - GetOptimizedModule(kHloText)); - EXPECT_THAT( - module->entry_computation()->root_instruction(), - GmockMatch(m::Fusion(m::Parameter(), m::Parameter()) - .WithFusionKind(HloInstruction::FusionKind::kCustom))); - - EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{/*aabs=*/1e-3, /*arel=*/1e-3})); -} - TEST_F(TritonGemmTest, SineOutputIsNotFused) { const std::string kHloText = R"( HloModule m From 712acc54fb2dab395e6cd80316206d53647ae9bf Mon Sep 17 00:00:00 2001 From: Luke Boyer Date: Thu, 21 Sep 2023 12:45:11 -0700 Subject: [PATCH 100/567] Add helper to parse element type from variant. Useful when ops don't support the helper query for retrieving underlying type. PiperOrigin-RevId: 567390216 --- .../mlir/lite/transforms/legalize_tensorlist.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc index 498009806090bc..820021284b833b 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc @@ -18,14 +18,17 @@ limitations under the License. #include #include +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/IR/BuiltinTypes.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project @@ -35,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/utils/convert_type.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/lite/schema/schema_generated.h" namespace { @@ -53,6 +57,19 @@ mlir::TFL::ConstBytesAttr CreateListReserveOptions( return SerializeOptionsToBytes(context, options); } +std::optional GetSingularVariantBaseType(mlir::Value val) { + auto val_t = + mlir::getElementTypeOrSelf(val).dyn_cast_or_null(); + if (!val_t) { + return std::nullopt; + } + llvm::ArrayRef subtypes = val_t.getSubtypes(); + if (subtypes.size() != 1) { + return std::nullopt; + } + return subtypes[0].getElementType(); +} // NOLINT: TODO(b/257472333) This function will be used in child changes. + } // namespace // Create an `mlir::TFL::ConstBytesAttr` which encodes the options From b893c461189c8a44ed850c10d5ba34b6a21704c2 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 13:13:43 -0700 Subject: [PATCH 101/567] [xla:gpu] Depend on stream_executor implementation in hlo_graph_dumper PiperOrigin-RevId: 567398096 --- third_party/xla/xla/service/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 04003d8f96c093..65c42edbbfcaad 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -5272,7 +5272,7 @@ cc_library( "//xla/hlo/ir:hlo", "//xla/service/gpu:backend_configs_cc", "//xla/service/gpu:cublas_cudnn", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", From d844b5b0311a1718060ccd1c98720e189d038136 Mon Sep 17 00:00:00 2001 From: Fiona Lang Date: Thu, 21 Sep 2023 13:23:19 -0700 Subject: [PATCH 102/567] Create an python/ops/distributions init file and delete the distributions import in the python init file. PiperOrigin-RevId: 567400961 --- tensorflow/python/BUILD | 2 +- tensorflow/python/__init__.py | 4 ---- tensorflow/python/modules_with_exports.py | 3 +++ tensorflow/python/ops/distributions/BUILD | 11 ++++++++++- tensorflow/python/ops/distributions/__init__.py | 16 ++++++++++++++++ 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 2fea59e32f411e..4f7b1427e37ce1 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -83,7 +83,6 @@ py_strict_library( "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/ops:gradient_checker_v2", "//tensorflow/python/ops:stateful_random_ops", - "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/structured:structured_ops", "//tensorflow/python/tpu:tpu_estimator", "//tensorflow/python/tpu:tpu_noestimator", @@ -411,6 +410,7 @@ py_strict_library( "//tensorflow/python/ops:tpu_ops_gen", "//tensorflow/python/ops:uniform_quant_ops_gen", "//tensorflow/python/ops:while_v2", + "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", "//tensorflow/python/ops/linalg/sparse:sparse_py", "//tensorflow/python/ops/losses", diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index cc24e60c698a5f..00ad7e3138798d 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -48,7 +48,3 @@ # Expose symbols minus dunders, unless they are allowlisted above. # This is necessary to export our dunders. __all__ = [s for s in dir() if s in _exported_dunders or not s.startswith('_')] - -# TODO(b/296442875): remove this when we remove the tf.distribution package. -# This import is needed for tf.compat.v1.distributions. -from tensorflow.python.ops.distributions import distributions diff --git a/tensorflow/python/modules_with_exports.py b/tensorflow/python/modules_with_exports.py index 6e59fc7f94424a..77bb4573e761c0 100644 --- a/tensorflow/python/modules_with_exports.py +++ b/tensorflow/python/modules_with_exports.py @@ -42,6 +42,9 @@ # Data from tensorflow.python import data +# Distributions +from tensorflow.python.ops import distributions + # TensorFlow Debugger (tfdbg). from tensorflow.python.debug.lib import check_numerics_callback from tensorflow.python.debug.lib import dumping_callback diff --git a/tensorflow/python/ops/distributions/BUILD b/tensorflow/python/ops/distributions/BUILD index 90790b9c071839..34cd63cb08ac8b 100644 --- a/tensorflow/python/ops/distributions/BUILD +++ b/tensorflow/python/ops/distributions/BUILD @@ -8,7 +8,7 @@ package( py_strict_library( name = "distributions", - srcs = ["distributions.py"], + srcs = ["__init__.py"], deprecation = ("TensorFlow Distributions has migrated to " + "TensorFlow Probability " + "(https://github.com/tensorflow/probability). " + @@ -17,6 +17,15 @@ py_strict_library( "early 2019. You should update all usage of " + "`tf.distributions` to `tfp.distributions`."), srcs_version = "PY3", + deps = [ + ":distributions_py", + ], +) + +py_strict_library( + name = "distributions_py", + srcs = ["distributions.py"], + srcs_version = "PY3", deps = [ ":bernoulli", ":beta", diff --git a/tensorflow/python/ops/distributions/__init__.py b/tensorflow/python/ops/distributions/__init__.py index e69de29bb2d1d6..8365f2485eeb95 100644 --- a/tensorflow/python/ops/distributions/__init__.py +++ b/tensorflow/python/ops/distributions/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Core module for TensorFlow distribution objects and helpers.""" +from tensorflow.python.ops.distributions import distributions From 307cfab7f3192db349d028b87a382a5b0328c647 Mon Sep 17 00:00:00 2001 From: Luke Boyer Date: Thu, 21 Sep 2023 14:12:25 -0700 Subject: [PATCH 103/567] TensorListLength kernel implementation. PiperOrigin-RevId: 567415547 --- tensorflow/lite/kernels/variants/BUILD | 15 ++++ .../variants/list_kernels/list_length.cc | 74 +++++++++++++++++++ .../variants/list_kernels/list_length_test.cc | 68 +++++++++++++++++ .../lite/kernels/variants/list_ops_lib.h | 2 + .../variants/list_ops_subgraph_test.cc | 34 +++++++++ .../variants/list_ops_subgraph_test_util.cc | 58 +++++++++++++++ .../variants/list_ops_subgraph_test_util.h | 3 + .../kernels/variants/register_list_ops.cc | 1 + 8 files changed, 255 insertions(+) create mode 100644 tensorflow/lite/kernels/variants/list_kernels/list_length.cc create mode 100644 tensorflow/lite/kernels/variants/list_kernels/list_length_test.cc diff --git a/tensorflow/lite/kernels/variants/BUILD b/tensorflow/lite/kernels/variants/BUILD index 27abbe16721a1e..55fd1a30ac1c5a 100644 --- a/tensorflow/lite/kernels/variants/BUILD +++ b/tensorflow/lite/kernels/variants/BUILD @@ -127,6 +127,21 @@ cc_test( ], ) +cc_test( + name = "list_length_test", + srcs = ["list_kernels/list_length_test.cc"], + deps = [ + ":list_ops_lib", + ":tensor_array", + ":test_util", + "//tensorflow/lite/core/c:c_api_types", + "//tensorflow/lite/core/c:common", + "//tensorflow/lite/kernels:test_util", + "//tensorflow/lite/schema:schema_fbs", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "list_ops_util", srcs = ["list_ops_util.cc"], diff --git a/tensorflow/lite/kernels/variants/list_kernels/list_length.cc b/tensorflow/lite/kernels/variants/list_kernels/list_length.cc new file mode 100644 index 00000000000000..612e626fa5d6f4 --- /dev/null +++ b/tensorflow/lite/kernels/variants/list_kernels/list_length.cc @@ -0,0 +1,74 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/core/c/c_api_types.h" +#include "tensorflow/lite/core/c/common.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/variants/list_ops_lib.h" +#include "tensorflow/lite/kernels/variants/tensor_array.h" + +namespace tflite { +namespace variants { +namespace ops { +namespace list_length { +namespace { + +using ::tflite::variants::TensorArray; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + const TfLiteTensor* list_input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &list_input)); + TF_LITE_ENSURE_TYPES_EQ(context, list_input->type, kTfLiteVariant); + + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output)); + TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, output->dims->size, 0); + + output->allocation_type = kTfLiteArenaRw; + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* list_input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &list_input)); + TF_LITE_ENSURE_EQ(context, list_input->allocation_type, kTfLiteVariantObject); + const TensorArray* const input_arr = + reinterpret_cast(list_input->data.data); + + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output)); + + const int length = input_arr->NumElements(); + output->data.i32[0] = length; + + return kTfLiteOk; +} +} // namespace +} // namespace list_length + +TfLiteRegistration* Register_LIST_LENGTH() { + static TfLiteRegistration r = {nullptr, nullptr, list_length::Prepare, + list_length::Eval}; + return &r; +} + +} // namespace ops +} // namespace variants +} // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_kernels/list_length_test.cc b/tensorflow/lite/kernels/variants/list_kernels/list_length_test.cc new file mode 100644 index 00000000000000..ac1f49a9e52f41 --- /dev/null +++ b/tensorflow/lite/kernels/variants/list_kernels/list_length_test.cc @@ -0,0 +1,68 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include +#include +#include "tensorflow/lite/core/c/c_api_types.h" +#include "tensorflow/lite/core/c/common.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/kernels/variants/list_kernels/test_util.h" +#include "tensorflow/lite/kernels/variants/list_ops_lib.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace variants { +namespace ops { +namespace { + +class ListLengthModel : public ListOpModel { + public: + ListLengthModel() { + list_input_ = AddInput({TensorType_VARIANT, {}}); + length_output_ = AddOutput({TensorType_INT32, {}}); + SetCustomOp("ListLength", {}, Register_LIST_LENGTH); + BuildInterpreter({{}}); + } + const TfLiteTensor* GetOutputTensor() { + return interpreter_->tensor(length_output_); + } + int list_input_; + int length_output_; +}; + +class ListLengthTest : public ::testing::TestWithParam {}; + +TEST_P(ListLengthTest, OutputIsListLength) { + const int length = GetParam(); + ListLengthModel m; + m.PopulateListTensor(m.list_input_, {2, 2}, length, kTfLiteInt32); + + ASSERT_EQ(m.Invoke(), kTfLiteOk); + + const TfLiteTensor* const output = m.GetOutputTensor(); + ASSERT_EQ(output->type, kTfLiteInt32); + ASSERT_EQ(output->allocation_type, kTfLiteArenaRw); + ASSERT_THAT(output, DimsAre({})); + ASSERT_EQ(output->data.i32[0], length); +} + +INSTANTIATE_TEST_SUITE_P(ListLengthTests, ListLengthTest, + testing::Values(0, 1, 5, 10, 100)); + +} // namespace +} // namespace ops +} // namespace variants +} // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_ops_lib.h b/tensorflow/lite/kernels/variants/list_ops_lib.h index eae6e88a87a1f3..4bf1b6038661ed 100644 --- a/tensorflow/lite/kernels/variants/list_ops_lib.h +++ b/tensorflow/lite/kernels/variants/list_ops_lib.h @@ -39,6 +39,8 @@ TfLiteRegistration* Register_LIST_FROM_TENSOR(); TfLiteRegistration* Register_LIST_GET_ITEM(); +TfLiteRegistration* Register_LIST_LENGTH(); + } // namespace ops } // namespace variants } // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_ops_subgraph_test.cc b/tensorflow/lite/kernels/variants/list_ops_subgraph_test.cc index 888574cc87212f..ee12e45ea3871d 100644 --- a/tensorflow/lite/kernels/variants/list_ops_subgraph_test.cc +++ b/tensorflow/lite/kernels/variants/list_ops_subgraph_test.cc @@ -287,5 +287,39 @@ TEST_F(WhileIncrementListOpsTest, } } +class ListReserveLengthSubgraphTest + : public ListOpsSubgraphTest, + public ::testing::WithParamInterface {}; + +TEST_P(ListReserveLengthSubgraphTest, InterpreterOutputsListLength) { + const int length = GetParam(); + + builder_.BuildReserveLengthSubgraph(&interpreter_.primary_subgraph()); + + ASSERT_EQ(interpreter_.ResizeInputTensor(0, {1}), kTfLiteOk); + ASSERT_EQ(interpreter_.ResizeInputTensor(1, {}), kTfLiteOk); + ASSERT_EQ(interpreter_.AllocateTensors(), kTfLiteOk); + + TfLiteTensor* element_shape = interpreter_.input_tensor(0); + element_shape->data.i32[0] = 2; + + TfLiteTensor* num_elements = interpreter_.input_tensor(1); + num_elements->data.i32[0] = length; + + ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk); + + TfLiteTensor* output = interpreter_.output_tensor(0); + ASSERT_EQ(output->type, kTfLiteInt32); + ASSERT_EQ(output->allocation_type, kTfLiteArenaRw); + ASSERT_THAT(output, DimsAre({})); + ASSERT_TRUE(output->data.data != nullptr); + + ASSERT_EQ(output->data.i32[0], length); +} + +INSTANTIATE_TEST_SUITE_P(ListOpsSubgraphParamTests, + ListReserveLengthSubgraphTest, + testing::Values(0, 1, 2, 5, 10)); + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.cc b/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.cc index 6090741e631eb5..536e1446588a78 100644 --- a/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.cc +++ b/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.cc @@ -35,6 +35,7 @@ limitations under the License. using ::tflite::subgraph_test_util::SetupTensor; using ::tflite::variants::detail::ListReserveOptions; +using ::tflite::variants::ops::Register_LIST_LENGTH; using ::tflite::variants::ops::Register_LIST_RESERVE; using ::tflite::variants::ops::Register_LIST_SET_ITEM; using ::tflite::variants::ops::Register_LIST_STACK; @@ -344,4 +345,61 @@ void ListOpsSubgraphBuilder::BuildSetItemAndIncrementSubgraph( TF_LITE_ASSERT_EQ(add_stat, kTfLiteOk); } +void ListOpsSubgraphBuilder::BuildReserveLengthSubgraph(Subgraph* subgraph) { + constexpr int kElementShape = 0; + constexpr int kNumElements = 1; + constexpr int kReserveOut = 2; + constexpr int kLengthOut = 3; + constexpr int kTensorCount = 4; + // kElementShape(0) --> +-------------+ + // | ListReserve | + // kNumElements(1) --> +-------------+ --> kReserveOut(2) + // | + // +------------+ + // | ListLength | + // +------------+ --> kLengthOut(3) + + int first_new_tensor_index; + TF_LITE_ASSERT_EQ(subgraph->AddTensors(kTensorCount, &first_new_tensor_index), + kTfLiteOk); + TF_LITE_ASSERT_EQ(first_new_tensor_index, 0); + + TF_LITE_ASSERT_EQ(subgraph->SetOutputs({kLengthOut}), kTfLiteOk); + SetupTensor(subgraph, kLengthOut, kTfLiteInt32); + + TF_LITE_ASSERT_EQ(subgraph->SetInputs({kElementShape, kNumElements}), + kTfLiteOk); + SetupTensor(subgraph, kElementShape, kTfLiteInt32); + SetupTensor(subgraph, kNumElements, kTfLiteInt32); + SetupTensor(subgraph, kReserveOut, kTfLiteVariant); + + TfLiteRegistration* reserve_reg = Register_LIST_RESERVE(); + reserve_reg->builtin_code = BuiltinOperator_CUSTOM; + reserve_reg->custom_name = "ListReserve"; + + ListReserveOptions* options = RequestReserveOptions(TensorType_INT32); + + int reserve_node_index; + TfLiteStatus stat = subgraph->AddNodeWithParameters( + {kElementShape, kNumElements}, {kReserveOut}, + /*intermediates=*/{}, reinterpret_cast(options), + sizeof(ListReserveOptions), + /*builtin_data=*/nullptr, reserve_reg, &reserve_node_index); + + TF_LITE_ASSERT_EQ(stat, kTfLiteOk); + + TfLiteRegistration* length_reg = Register_LIST_LENGTH(); + length_reg->builtin_code = BuiltinOperator_CUSTOM; + length_reg->custom_name = "ListLength"; + + int length_node_index; + stat = subgraph->AddNodeWithParameters( + {kReserveOut}, {kLengthOut}, + /*intermediates=*/{}, /*init_data=*/nullptr, + /*init_data_size=*/0, + /*builtin_data=*/nullptr, length_reg, &length_node_index); + + TF_LITE_ASSERT_EQ(stat, kTfLiteOk); +} + } // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.h b/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.h index d4e469e6e53526..720a1461bc392d 100644 --- a/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.h +++ b/tensorflow/lite/kernels/variants/list_ops_subgraph_test_util.h @@ -56,6 +56,9 @@ class ListOpsSubgraphBuilder { // the given int by 1. void BuildSetItemAndIncrementSubgraph(Subgraph* subgraph); + // Populates the given Subgraph with a "ListReserve" and "ListLength" op. + void BuildReserveLengthSubgraph(Subgraph* subgraph); + private: // Creates a constant tensor in given Subgraphs at given indice with // corresponding data. diff --git a/tensorflow/lite/kernels/variants/register_list_ops.cc b/tensorflow/lite/kernels/variants/register_list_ops.cc index 606ef9afc9a1c0..6c9f205377b0c5 100644 --- a/tensorflow/lite/kernels/variants/register_list_ops.cc +++ b/tensorflow/lite/kernels/variants/register_list_ops.cc @@ -27,6 +27,7 @@ void RegisterListOps(MutableOpResolver* resolver) { resolver->AddCustom("TensorListSetItem", Register_LIST_SET_ITEM()); resolver->AddCustom("TensorListFromTensor", Register_LIST_FROM_TENSOR()); resolver->AddCustom("TensorListGetItem", Register_LIST_GET_ITEM()); + resolver->AddCustom("TensorListLength", Register_LIST_LENGTH()); } } // namespace ops From 8c3316dfd30e0ce8bfe323d94d35c0b2edf60b98 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 21 Sep 2023 14:16:59 -0700 Subject: [PATCH 104/567] Replicate small constants so they don't need to be sent to their successors. A small constant is replicated to each of its successors' devices. The maximum size of a constant to be replicated is 16 elements. This pass is disabled by default and can be enabled with the flag replicate_small_constants. PiperOrigin-RevId: 567416836 --- tensorflow/core/common_runtime/BUILD | 50 ++- .../replicate_constants_pass.cc | 189 ++++++++++ .../common_runtime/replicate_constants_pass.h | 50 +++ .../replicate_constants_pass_test.cc | 334 ++++++++++++++++++ tensorflow/core/config/flag_defs.h | 4 + tensorflow/core/config/flags_api_wrapper.cc | 1 + tensorflow/python/flags_pybind.pyi | 1 + 7 files changed, 620 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/common_runtime/replicate_constants_pass.cc create mode 100644 tensorflow/core/common_runtime/replicate_constants_pass.h create mode 100644 tensorflow/core/common_runtime/replicate_constants_pass_test.cc diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 4b7c5eb8b7c29d..1db3c0d12ed9f5 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -1,7 +1,9 @@ +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( "//tensorflow:tensorflow.bzl", "if_google", "if_libtpu", + "if_macos", "if_oss", "if_zendnn", "tf_cc_test", @@ -21,25 +23,24 @@ load( "tf_protos_all", "tf_protos_grappler", ) -load( - "//tensorflow/core/platform:rules_cc.bzl", - "cc_library", -) load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", "tf_cuda_tests_tags", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( - "//third_party/mkl:build_defs.bzl", - "if_mkl", - "if_mkl_ml", + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library", ) load( "//tensorflow/security/fuzzing:tf_fuzzing.bzl", "tf_cc_fuzz_test", ) +load( + "//third_party/mkl:build_defs.bzl", + "if_mkl", + "if_mkl_ml", +) default_package_visibility = [ "//tensorflow:internal", @@ -299,6 +300,7 @@ filegroup( "renamed_device.h", "rendezvous_mgr.h", "rendezvous_util.h", + "replicate_constants_pass.h", "replicate_per_replica_nodes.h", "ring_alg.h", "ring_gatherer.h", @@ -1132,6 +1134,31 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "replicate_constants_pass", + srcs = ["replicate_constants_pass.cc"], + hdrs = ["replicate_constants_pass.h"], + copts = tf_copts(), + deps = [ + ":optimization_registry", + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:portable_gif_internal", + "//tensorflow/core/config:flag_defs", + "//tensorflow/core/config:flags", + "//tensorflow/core/framework:node_def_util", + "//tensorflow/core/framework:tensor_proto_cc", + "//tensorflow/core/framework:tensor_shape_proto_cc", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/platform:statusor", + ], + alwayslink = 1, +) + cc_library( name = "local_device", srcs = ["local_device.cc"], @@ -1948,7 +1975,10 @@ tf_cuda_library( ":step_stats_collector", ":threadpool_device", ":threadpool_device_factory", - ] + if_zendnn([":zen_layout_pass"]), + ] + if_zendnn([":zen_layout_pass"]) + if_macos( + [], + [":replicate_constants_pass"], + ), ) tf_cuda_library( @@ -2312,6 +2342,7 @@ tf_cc_tests( "optimization_registry_test.cc", "pending_counts_test.cc", "placer_inspection_required_ops_utils_test.cc", + "replicate_constants_pass_test.cc", "session_test.cc", "threadpool_device_test.cc", ], @@ -2342,6 +2373,7 @@ tf_cc_tests( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/config:flag_defs", "//tensorflow/core/kernels:ops_util", "//tensorflow/core/nccl:collective_communicator", "//tensorflow/core/platform:regexp", diff --git a/tensorflow/core/common_runtime/replicate_constants_pass.cc b/tensorflow/core/common_runtime/replicate_constants_pass.cc new file mode 100644 index 00000000000000..73f96d66f940bb --- /dev/null +++ b/tensorflow/core/common_runtime/replicate_constants_pass.cc @@ -0,0 +1,189 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/replicate_constants_pass.h" + +#include +#include +#include +#include +#include + +#include "absl/container/btree_map.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/config/flag_defs.h" +#include "tensorflow/core/config/flags.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/dump_graph.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/status.h" +#include "tsl/platform/statusor.h" + +namespace tensorflow { +namespace { + +// Maximum size constant to replicate. +constexpr int64_t kMaxSize = 16; + +// Set `node`'s name to /replicate/_ +void SetUniqueName(Graph* graph, Node* node) { + node->set_name(graph->NewName(absl::StrCat(node->name(), "/replicate"))); +} + +// `node` has an output control edge. +bool HasControlOut(Node* node) { + auto control_out_it = + std::find_if(node->out_edges().begin(), node->out_edges().end(), + [](const auto& e) { return e->IsControlEdge(); }); + return control_out_it != node->out_edges().end(); +} + +// `node`'s device is a CPU. +bool HasCpuDevice(const Node* node) { + DeviceNameUtils::ParsedName device; + if (!DeviceNameUtils::ParseFullName(node->assigned_device_name(), &device)) + return false; + return device.type == "CPU"; +} + +// Get the CPU device on the same host as dst. +Status GetDestinationCpuDevice(const Node* dst, std::string* device) { + if (!dst->has_assigned_device_name()) + return absl::AbortedError( + absl::StrCat("Node name: ", dst->name(), " has no assigned device.")); + return DeviceNameUtils::DeviceNameToCpuDeviceName(dst->assigned_device_name(), + device); +} + +// Collect the successor edges of the constant. Group them by the device of the +// successor. +Status GetSuccessorEdges( + Node* node, + absl::btree_map>& device_to_edges) { + for (const auto& edge : node->out_edges()) { + const Node* dst = edge->dst(); + std::string device; + TF_RETURN_IF_ERROR(GetDestinationCpuDevice(dst, &device)); + if (!device_to_edges.count(device)) device_to_edges.insert({device, {}}); + device_to_edges[device].push_back(edge); + } + return OkStatus(); +} + +// Replicate the constant to each successor device. +void ReplicateToEachDevice( + Graph* graph, Node* node, + absl::btree_map>& device_to_edges) { + for (const auto& pair : device_to_edges) { + Node* copy = graph->CopyNode(node); + SetUniqueName(graph, copy); + const std::string device = pair.first; + copy->set_assigned_device_name(device); + // Set the successor edges to ops on this device. + for (const Edge* edge : pair.second) { + graph->AddEdge(copy, edge->src_output(), edge->dst(), edge->dst_input()); + } + // Replicate in edges that are control. + for (Node* src : node->in_nodes()) { + graph->AddControlEdge(src, copy, true); + } + } + graph->RemoveNode(node); +} + +} // namespace + +Status ReplicateConstantsPass::Run( + const GraphOptimizationPassOptions& options) { + if (!flags::Global().replicate_small_constants.value()) { + VLOG(1) << "replicate_constants_pass not enabled"; + return OkStatus(); + } + VLOG(1) << "replicate_constants_pass will replicate constants with " + "number-of-elements <= " + << kMaxSize; + + Graph* graph = options.graph->get(); + if (VLOG_IS_ON(1)) { + VLOG(1) << DumpGraphToFile("before_replicate_constants_pass", *graph, + options.flib_def); + } + int64_t min_skipped = std::numeric_limits::max(); + int64_t max_skipped = std::numeric_limits::min(); + for (Node* node : graph->nodes()) { + if (!node->IsConstant()) continue; + + // For performance, skip when there is at most one successor. + if (node->out_edges().size() <= 1) continue; + + // Skip if the constant has a control successor. Replicating constants with + // control successors would require relpicating these control edges, which + // could result in even more message passing. + if (HasControlOut(node)) continue; + + // Skip if the constant is too large. + const TensorProto* value = nullptr; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "value", &value)); + TF_ASSIGN_OR_RETURN(TensorShape shape, + TensorShape::BuildTensorShape(value->tensor_shape())); + if (shape.num_elements() > kMaxSize) { + min_skipped = std::min(min_skipped, shape.num_elements()); + max_skipped = std::max(max_skipped, shape.num_elements()); + continue; + } + + // Skip if there is no assigned device. + if (!node->has_assigned_device_name()) continue; + + // Skip when the original constant is not on a CPU, because is not clear + // whether replicating from non-CPU to CPU is valid. + if (!HasCpuDevice(node)) continue; + + // Collect successor edges, per device. + absl::btree_map> device_to_edges; + TF_RETURN_IF_ERROR(GetSuccessorEdges(node, device_to_edges)); + + // Skip if all successors are on the same device. + if (device_to_edges.size() <= 1) continue; + + // Replicate the constant to each successor device. + ReplicateToEachDevice(graph, node, device_to_edges); + } + if (min_skipped != std::numeric_limits::max()) { + VLOG(1) << "replicate_constants_pass skipped replicating constants with " + "number of elements in the range " + << min_skipped << " to " << max_skipped << "."; + } + + if (VLOG_IS_ON(1)) { + VLOG(1) << DumpGraphToFile("after_replicate_constants_pass", *graph, + options.flib_def); + } + return OkStatus(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PLACEMENT, 3, + ReplicateConstantsPass); + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/replicate_constants_pass.h b/tensorflow/core/common_runtime/replicate_constants_pass.h new file mode 100644 index 00000000000000..b7b2f0fe98c0d2 --- /dev/null +++ b/tensorflow/core/common_runtime/replicate_constants_pass.h @@ -0,0 +1,50 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" + +// Small constants are replicated to the hosts of their successors. This pass +// only applies when there are multiple successors. +// +// For example, the graph: +// C -> {Op0, Op1, Op2, Op3} +// C's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:CPU:0 +// Op0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:0 +// Op1's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:1 +// Op2's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:0 +// Op3's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:1 +// is rewritten to: +// C0 -> {Op0, Op1} +// C1 -> {Op2, Op3} +// C0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:CPU:0 +// C1's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:CPU:0 +// Op0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:0 +// Op1's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:1 +// Op2's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:0 +// Op3's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:1 + +namespace tensorflow { + +class ReplicateConstantsPass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ diff --git a/tensorflow/core/common_runtime/replicate_constants_pass_test.cc b/tensorflow/core/common_runtime/replicate_constants_pass_test.cc new file mode 100644 index 00000000000000..dae22012bfba8b --- /dev/null +++ b/tensorflow/core/common_runtime/replicate_constants_pass_test.cc @@ -0,0 +1,334 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/replicate_constants_pass.h" + +#include +#include + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/config/flag_defs.h" +#include "tensorflow/core/config/flags.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/test.h" +#include "tsl/lib/core/status_test_util.h" +#include "tsl/platform/status.h" +#include "tsl/platform/test.h" + +namespace tensorflow { + +const char kCpu0[] = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"; +const char kCpu1[] = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"; +const char kTpu00[] = "/job:tpu_host_worker/replica:0/task:0/device:TPU:0"; +const char kTpu01[] = "/job:tpu_host_worker/replica:0/task:0/device:TPU:1"; +const char kTpu10[] = "/job:tpu_host_worker/replica:0/task:1/device:TPU:0"; +const char kTpu11[] = "/job:tpu_host_worker/replica:0/task:1/device:TPU:1"; + +// Return the node with name `name`. +Node* GetNode(const Graph& graph, const std::string& name) { + for (Node* node : graph.nodes()) { + if (node->name() == name) return node; + } + CHECK(false) << "Unknown node name: " << name; + return nullptr; +} + +// Return the first predecessor of `node`. +Node* GetPredecessor(Node* node) { + auto it = node->in_nodes().begin(); + CHECK(it != node->in_nodes().end()) + << "No predecessor for " << node->name() << "\n"; + return *it; +} + +// There exists an edge from `src` to `dst`. +bool IsEdge(Node* src, Node* dst) { + for (Node* node : src->out_nodes()) { + if (node == dst) return true; + } + return false; +} + +// Test that a small constant is replicated to each successor's device. +TEST(ReplicateConstantsPassTest, TestSmallConstant) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const"), 1.0f, TensorShape({})); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_EQ(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_EQ(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +// Test that a large constant is ignored. +TEST(ReplicateConstantsPassTest, TestLargeConstant) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const"), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_NE(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_NE(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +// Test that a constant with a control successor is ignored. +TEST(ReplicateConstantsPassTest, TestControlOut) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const0"), 1.0f, TensorShape({})); + Output ctrl_succ = + ops::Const(scope.WithOpName("ctrl_succ"), 1.0f, TensorShape({})); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "ctrl_succ")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + graph->AddControlEdge(GetNode(*graph, "const0"), + GetNode(*graph, "ctrl_succ")); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_NE(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_NE(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +// Test that a constant on a TPU is ignored. +TEST(ReplicateConstantsPassTest, TestTpuConst) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const0"), 1.0f, TensorShape({})); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const0")->set_assigned_device_name(kTpu00); + GetNode(*graph, "dst0")->set_assigned_device_name(kTpu00); + GetNode(*graph, "dst1")->set_assigned_device_name(kTpu10); + GetNode(*graph, "dst2")->set_assigned_device_name(kTpu10); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_NE(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_NE(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +TEST(ReplicateConstantsPassTest, TestSmallAndLargeConstants) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output small = ops::Const(scope.WithOpName("small"), 1.0f, TensorShape({})); + Output large = + ops::Const(scope.WithOpName("large"), + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}); + ops::Add dst0(scope.WithOpName("dst0"), small, large); + ops::Add dst1(scope.WithOpName("dst1"), small, large); + ops::Add dst2(scope.WithOpName("dst2"), small, large); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "small")->set_assigned_device_name(kCpu0); + GetNode(*graph, "large")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* small0 = GetNode(*graph, "small/replicate/_0"); + Node* small1 = GetNode(*graph, "small/replicate/_1"); + Node* large = GetNode(*graph, "large"); + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(small0->assigned_device_name(), kCpu0); + EXPECT_EQ(small1->assigned_device_name(), kCpu1); + EXPECT_EQ(large->assigned_device_name(), kCpu0); + EXPECT_EQ(dst0->assigned_device_name(), kCpu0); + EXPECT_EQ(dst1->assigned_device_name(), kCpu1); + EXPECT_EQ(dst1->assigned_device_name(), kCpu1); + EXPECT_TRUE(IsEdge(small0, dst0)); + EXPECT_TRUE(IsEdge(large, dst0)); + EXPECT_TRUE(IsEdge(small1, dst1)); + EXPECT_TRUE(IsEdge(large, dst1)); + EXPECT_TRUE(IsEdge(small1, dst2)); + EXPECT_TRUE(IsEdge(large, dst2)); +} + +// Test that a constant at a CPU with TPU successors is replicated to the +// TPUs' host CPUs. +TEST(ReplicateConstantsPassTest, TestTpuDestinations) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const"), 1.0f, TensorShape({})); + ops::Negate dst00(scope.WithOpName("dst00"), const0); + ops::Negate dst01(scope.WithOpName("dst01"), const0); + ops::Negate dst10(scope.WithOpName("dst10"), const0); + ops::Negate dst11(scope.WithOpName("dst11"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst00")->set_assigned_device_name(kTpu00); + GetNode(*graph, "dst01")->set_assigned_device_name(kTpu01); + GetNode(*graph, "dst10")->set_assigned_device_name(kTpu10); + GetNode(*graph, "dst11")->set_assigned_device_name(kTpu11); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* const0 = GetNode(*graph, "const/replicate/_0"); + Node* const1 = GetNode(*graph, "const/replicate/_1"); + Node* dst00 = GetNode(*graph, "dst00"); + Node* dst01 = GetNode(*graph, "dst01"); + Node* dst10 = GetNode(*graph, "dst10"); + Node* dst11 = GetNode(*graph, "dst11"); + EXPECT_EQ(const0->assigned_device_name(), kCpu0); + EXPECT_EQ(const1->assigned_device_name(), kCpu1); + EXPECT_TRUE(IsEdge(const0, dst00)); + EXPECT_TRUE(IsEdge(const0, dst01)); + EXPECT_TRUE(IsEdge(const1, dst10)); + EXPECT_TRUE(IsEdge(const1, dst11)); +} + +} // namespace tensorflow diff --git a/tensorflow/core/config/flag_defs.h b/tensorflow/core/config/flag_defs.h index 4ab5fb4750de46..89ea6a9b73bbf1 100644 --- a/tensorflow/core/config/flag_defs.h +++ b/tensorflow/core/config/flag_defs.h @@ -49,6 +49,10 @@ class Flags { TF_DECLARE_FLAG(more_stack_traces, false, "Enable experimental code that preserves and propagates " "graph node stack traces in C++."); + TF_DECLARE_FLAG(replicate_small_constants, false, + "Enable a graph optimization pass that replicate each small " + "constant to its successors' devices. This can decrease " + "message passing."); // LINT.ThenChange(//tensorflow/core/config/flags_api_wrapper.cc) }; diff --git a/tensorflow/core/config/flags_api_wrapper.cc b/tensorflow/core/config/flags_api_wrapper.cc index 58074fb06257d1..974581e931f7ec 100644 --- a/tensorflow/core/config/flags_api_wrapper.cc +++ b/tensorflow/core/config/flags_api_wrapper.cc @@ -51,5 +51,6 @@ PYBIND11_MODULE(flags_pybind, m) { TF_PY_DECLARE_FLAG(saved_model_fingerprinting); TF_PY_DECLARE_FLAG(tf_shape_default_int64); TF_PY_DECLARE_FLAG(more_stack_traces); + TF_PY_DECLARE_FLAG(replicate_small_constants); // LINT.ThenChange(//tensorflow/core/config/flag_defs.h) }; diff --git a/tensorflow/python/flags_pybind.pyi b/tensorflow/python/flags_pybind.pyi index 34b0a0c5666eb8..90aa0a7d76114b 100644 --- a/tensorflow/python/flags_pybind.pyi +++ b/tensorflow/python/flags_pybind.pyi @@ -24,6 +24,7 @@ class Flags: graph_building_optimization: Flag more_stack_traces: Flag op_building_optimization: Flag + replicate_small_constants: Flag saved_model_fingerprinting: Flag test_only_experiment_1: Flag test_only_experiment_2: Flag From 44f18cf198b4315e55e9dfd84e35ee748e629286 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 14:27:34 -0700 Subject: [PATCH 105/567] [XLA] Add WithSharding in pattern matcher and modify tests to conform to the new pattern matching format -Add WithSharding implementation for HloInstructionPattern to match with the sharding. PiperOrigin-RevId: 567419758 --- third_party/xla/xla/service/gpu/BUILD | 7 +- .../gpu/all_reduce_blueconnect_test.cc | 3 - .../gpu/auto_sharding_gpu_compiler_test.cc | 10 ++- .../gpu/scatter_slice_simplifier_test.cc | 82 ++++++++++--------- third_party/xla/xla/service/pattern_matcher.h | 55 +++++++++++++ .../xla/xla/service/pattern_matcher_test.cc | 25 ++++++ 6 files changed, 137 insertions(+), 45 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index abba4f41bd9c97..c6f2f7d1ba3a08 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -2781,6 +2781,8 @@ xla_cc_test( deps = [ "//xla/hlo/utils:hlo_matchers", "//xla/service:gpu_plugin", + "//xla/service:pattern_matcher", + "//xla/service:pattern_matcher_gmock", "//xla/tests:hlo_test_base", "//xla/tests:xla_internal_test_main", ], @@ -3013,7 +3015,6 @@ xla_cc_test( ":all_reduce_blueconnect", "//xla:shape_util", "//xla/hlo/ir:hlo", - "//xla/hlo/utils:hlo_matchers", "//xla/service:pattern_matcher", "//xla/service:pattern_matcher_gmock", "//xla/tests:hlo_test_base", @@ -4314,7 +4315,9 @@ xla_cc_test( srcs = ["scatter_slice_simplifier_test.cc"], deps = [ ":scatter_slice_simplifier", - "//xla/hlo/utils:hlo_matchers", + "//xla:shape_util", + "//xla/service:pattern_matcher", + "//xla/service:pattern_matcher_gmock", "//xla/tests:hlo_test_base", "//xla/tests:xla_internal_test_main", ], diff --git a/third_party/xla/xla/service/gpu/all_reduce_blueconnect_test.cc b/third_party/xla/xla/service/gpu/all_reduce_blueconnect_test.cc index 2a7f471bc5883a..755d3413d335d6 100644 --- a/third_party/xla/xla/service/gpu/all_reduce_blueconnect_test.cc +++ b/third_party/xla/xla/service/gpu/all_reduce_blueconnect_test.cc @@ -20,7 +20,6 @@ limitations under the License. #include "xla/hlo/ir/hlo_computation.h" #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" -#include "xla/hlo/utils/hlo_matchers.h" #include "xla/service/pattern_matcher.h" #include "xla/service/pattern_matcher_gmock.h" #include "xla/shape.h" @@ -31,9 +30,7 @@ limitations under the License. namespace xla { namespace { -using ::testing::AllOf; using ::tsl::testing::IsOkAndHolds; -namespace op = xla::testing::opcode_matchers; namespace m = ::xla::match; using AllReduceBlueConnectTest = HloTestBase; diff --git a/third_party/xla/xla/service/gpu/auto_sharding_gpu_compiler_test.cc b/third_party/xla/xla/service/gpu/auto_sharding_gpu_compiler_test.cc index 2c3f2a5396f76c..c410370b9700cb 100644 --- a/third_party/xla/xla/service/gpu/auto_sharding_gpu_compiler_test.cc +++ b/third_party/xla/xla/service/gpu/auto_sharding_gpu_compiler_test.cc @@ -16,6 +16,8 @@ limitations under the License. #include #include "xla/hlo/utils/hlo_matchers.h" +#include "xla/service/pattern_matcher.h" +#include "xla/service/pattern_matcher_gmock.h" #include "xla/tests/hlo_test_base.h" namespace xla { @@ -23,6 +25,7 @@ namespace gpu { namespace { namespace op = xla::testing::opcode_matchers; +namespace m = ::xla::match; class AutoShardingTest : public HloTestBase { protected: @@ -34,7 +37,7 @@ ENTRY matmul { ROOT root = f32[32,128]{1,0} dot(parameter.1, parameter.2), lhs_contracting_dims={1}, rhs_contracting_dims={0} })"; std::unique_ptr CompileMatMul(bool use_autosharding, - int num_partitions) { + int num_partitions) { HloModuleConfig config; config.set_use_spmd_partitioning(true); config.set_use_auto_spmd_partitioning(use_autosharding); @@ -57,14 +60,15 @@ TEST_F(AutoShardingTest, MatMulWithAutosharding) { auto compiled_module = CompileMatMul(true, 4); auto* instruction = FindInstruction(compiled_module.get(), "param"); VLOG(2) << instruction->ToString(); - EXPECT_THAT(instruction, op::Sharding("{devices=[4,1]0,1,2,3}")); + EXPECT_THAT(instruction, + GmockMatch(m::Op().WithSharding("{devices=[4,1]0,1,2,3}"))); } TEST_F(AutoShardingTest, MatMulWithoutAutosharding) { auto compiled_module = CompileMatMul(false, 4); auto* instruction = FindInstruction(compiled_module.get(), "param"); VLOG(2) << instruction->ToString(); - EXPECT_THAT(instruction, op::Sharding("{replicated}")); + EXPECT_THAT(instruction, GmockMatch(m::Op().WithSharding("{replicated}"))); } } // namespace diff --git a/third_party/xla/xla/service/gpu/scatter_slice_simplifier_test.cc b/third_party/xla/xla/service/gpu/scatter_slice_simplifier_test.cc index 8762c78783fdbf..89b43e74acf643 100644 --- a/third_party/xla/xla/service/gpu/scatter_slice_simplifier_test.cc +++ b/third_party/xla/xla/service/gpu/scatter_slice_simplifier_test.cc @@ -15,14 +15,16 @@ limitations under the License. #include "xla/service/gpu/scatter_slice_simplifier.h" -#include "xla/hlo/utils/hlo_matchers.h" +#include "xla/service/pattern_matcher.h" +#include "xla/service/pattern_matcher_gmock.h" +#include "xla/shape.h" +#include "xla/shape_util.h" #include "xla/tests/hlo_test_base.h" namespace xla { namespace { -namespace op = xla::testing::opcode_matchers; -using ::testing::AllOf; +namespace m = ::xla::match; using ScatterSliceSimplifierTest = HloTestBase; @@ -48,9 +50,9 @@ ENTRY main { ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); EXPECT_THAT(module->entry_computation()->root_instruction(), - AllOf(op::Shape("f32[8]"), - op::Scatter(op::Slice(op::Constant()), op::Parameter(0), - op::Parameter(1)))); + GmockMatch(m::Scatter(m::Slice(m::Constant()), m::Parameter(0), + m::Parameter(1)) + .WithShape(F32, {8}))); } TEST_F(ScatterSliceSimplifierTest, Scatter3D) { @@ -75,9 +77,9 @@ ENTRY main { ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); EXPECT_THAT(module->entry_computation()->root_instruction(), - AllOf(op::Shape("f32[4, 4, 4]"), - op::Scatter(op::Slice(op::Constant()), op::Parameter(0), - op::Parameter(1)))); + GmockMatch(m::Scatter(m::Slice(m::Constant()), m::Parameter(0), + m::Parameter(1)) + .WithShape(F32, {4, 4, 4}))); } TEST_F(ScatterSliceSimplifierTest, ScatterMultiOutput) { @@ -112,12 +114,15 @@ ENTRY main { ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); auto expected_scatter = - op::Scatter(op::Slice(op::Constant()), op::Slice(op::Constant()), - op::Parameter(0), op::Parameter(1), op::Parameter(2)); + m::Scatter(m::Slice(m::Constant()), m::Slice(m::Constant()), + m::Parameter(0), m::Parameter(1), m::Parameter(2)); + + Shape expected_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(F16, {8})}); EXPECT_THAT(module->entry_computation()->root_instruction(), - AllOf(op::Shape("(f32[8], f16[8])"), - op::Tuple(op::GetTupleElement(expected_scatter), - op::GetTupleElement(expected_scatter)))); + GmockMatch(m::Tuple(m::GetTupleElement(expected_scatter), + m::GetTupleElement(expected_scatter)) + .WithShapeEqualTo(&expected_shape))); } TEST_F(ScatterSliceSimplifierTest, NotMatching) { @@ -211,13 +216,16 @@ ENTRY main { .value(); ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); - auto expected_scatter = op::Scatter(op::Slice(op::Constant()), - op::Parameter(0), op::Parameter(1)); - EXPECT_THAT(module->entry_computation()->root_instruction(), - AllOf(op::Shape("(f32[8], f32[8])"), - op::Tuple(op::Abs(expected_scatter), - op::Maximum(expected_scatter, - op::Slice(op::Constant()))))); + auto expected_scatter = + m::Scatter(m::Slice(m::Constant()), m::Parameter(0), m::Parameter(1)); + + Shape expected_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(F32, {8})}); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + GmockMatch(m::Tuple(m::Abs(expected_scatter), + m::Maximum(expected_scatter, m::Slice(m::Constant()))) + .WithShapeEqualTo(&expected_shape))); } TEST_F(ScatterSliceSimplifierTest, IntermediaryChain) { @@ -244,12 +252,12 @@ ENTRY main { .value(); ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); - auto expected_scatter = op::Scatter(op::Slice(op::Constant()), - op::Parameter(0), op::Parameter(1)); - EXPECT_THAT( - module->entry_computation()->root_instruction(), - AllOf(op::Shape("f32[8]"), op::Add(op::Abs(expected_scatter), - op::Exp(op::Abs(expected_scatter))))); + auto expected_scatter = + m::Scatter(m::Slice(m::Constant()), m::Parameter(0), m::Parameter(1)); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Add(m::Abs(expected_scatter), + m::Exp(m::Abs(expected_scatter))) + .WithShape(F32, {8}))); } TEST_F(ScatterSliceSimplifierTest, DiamondShape) { @@ -283,12 +291,12 @@ ENTRY main { ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); auto expected_scatter = - op::Scatter(op::Slice(op::Constant()), op::Slice(op::Constant()), - op::Parameter(0), op::Parameter(1), op::Parameter(2)); + m::Scatter(m::Slice(m::Constant()), m::Slice(m::Constant()), + m::Parameter(0), m::Parameter(1), m::Parameter(2)); EXPECT_THAT(module->entry_computation()->root_instruction(), - AllOf(op::Shape("f32[8]"), - op::Add(op::GetTupleElement(expected_scatter), - op::GetTupleElement(expected_scatter)))); + GmockMatch(m::Add(m::GetTupleElement(expected_scatter), + m::GetTupleElement(expected_scatter)) + .WithShape(F32, {8}))); } TEST_F(ScatterSliceSimplifierTest, ElementwiseSelect) { @@ -314,12 +322,12 @@ ENTRY main { .value(); ScatterSliceSimplifier test_pass; ASSERT_TRUE(RunHloPass(&test_pass, module.get()).value()); - auto expected_scatter = op::Scatter(op::Slice(op::Constant()), - op::Parameter(0), op::Parameter(1)); + auto expected_scatter = + m::Scatter(m::Slice(m::Constant()), m::Parameter(0), m::Parameter(1)); EXPECT_THAT(module->entry_computation()->root_instruction(), - AllOf(op::Shape("f32[8]"), - op::Select(op::Slice(op::Parameter(2)), expected_scatter, - op::Slice(op::Constant())))); + GmockMatch(m::Select(m::Slice(m::Parameter(2)), expected_scatter, + m::Slice(m::Constant())) + .WithShape(F32, {8}))); } } // namespace diff --git a/third_party/xla/xla/service/pattern_matcher.h b/third_party/xla/xla/service/pattern_matcher.h index 6ff8faf666e346..041d500006ad53 100644 --- a/third_party/xla/xla/service/pattern_matcher.h +++ b/third_party/xla/xla/service/pattern_matcher.h @@ -38,6 +38,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_instructions.h" #include "xla/hlo/ir/hlo_opcode.h" +#include "xla/hlo/ir/hlo_sharding.h" #include "xla/layout_util.h" #include "xla/literal_util.h" #include "xla/service/hlo_parser.h" @@ -2086,6 +2087,55 @@ class HloInstructionReplicaGroupsImpl { std::vector> replica_groups_; }; +class HloInstructionShardingImpl { + public: + explicit HloInstructionShardingImpl( + const std::optional& sharding) + : sharding_(sharding) {} + + bool Match(const ::xla::HloInstruction* inst, MatchOption option) const { + return MatchImpl(inst, option); + } + + bool Match(::xla::HloInstruction* inst, MatchOption option) const { + return MatchImpl(inst, option); + } + + void DescribeTo(std::ostream* os, int64_t indent = 0) const { + if (sharding_.has_value()) { + *os << "with sharding " << sharding_->ToString(); + } else { + *os << "with no sharding"; + } + } + + private: + template + bool MatchImpl(HloInstructionType* inst, MatchOption option) const { + if (!sharding_.has_value()) { + if (!inst->has_sharding()) { + return true; + } + EXPLAIN << "HloInstruction is expected to have no sharding."; + return false; + } + if (inst->has_sharding()) { + if (inst->sharding() == sharding_.value()) { + return true; + } + EXPLAIN << "sharding " << inst->sharding().ToString() + << " don't match expected " << sharding_->ToString(); + return false; + } else { + EXPLAIN << "HloInstruction has no sharding. Expected: " + << sharding_->ToString(); + return false; + } + } + + std::optional sharding_; +}; + // Matches a constant scalar or effective scalar, optionally with a given value. template class HloConstantScalarImpl { @@ -2398,6 +2448,11 @@ class HloInstructionPattern { HloInstructionReplicaGroupsImpl(std::move(replica_groups))); } + auto WithSharding(absl::string_view sharding) const { + return AppendImpl( + HloInstructionShardingImpl(ParseSharding(sharding).value())); + } + void DescribeTo(std::ostream* os, int64_t indent = 0) const { impl_.DescribeTo(os, indent); } diff --git a/third_party/xla/xla/service/pattern_matcher_test.cc b/third_party/xla/xla/service/pattern_matcher_test.cc index 811ac0d720be5a..4511aa99cd08ea 100644 --- a/third_party/xla/xla/service/pattern_matcher_test.cc +++ b/third_party/xla/xla/service/pattern_matcher_test.cc @@ -1428,5 +1428,30 @@ TEST_F(PatternMatcherTest, TestWithReplicaGroups) { "replica_groups={{0,1},{2,3}}, to_apply=add"); } +TEST_F(PatternMatcherTest, TestWithSharding) { + constexpr char kModuleStr[] = R"( + HloModule test_module + ENTRY test { + p0 = f32[5,7,11,13]{3,2,1,0} parameter(0), + sharding={devices=[1,2,2,1]0,1,2,3}, + metadata={op_name="test"} + ROOT copy = f32[5,7,11,13]{3,2,1,0} copy(p0) + })"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, + ParseAndReturnVerifiedModule(kModuleStr)); + auto* instruction = FindInstruction(hlo_module.get(), "p0"); + EXPECT_TRUE( + Match(instruction, m::Op().WithSharding("{devices=[1,2,2,1]0,1,2,3}"))); + EXPECT_FALSE( + Match(instruction, m::Op().WithSharding("{devices=[2,2,1,1]0,1,2,3}"))); + EXPECT_DESC_AND_EXPLANATION( + instruction, m::Op().WithSharding("{devices=[2,2,1,1]0,1,2,3}"), + "an HloInstruction with sharding {devices=[2,2,1,1]0,1,2,3}", + "sharding {devices=[1,2,2,1]0,1,2,3} don't match expected " + "{devices=[2,2,1,1]0,1,2,3}\n" + "in p0 = f32[5,7,11,13]{3,2,1,0} parameter(0), " + "sharding={devices=[1,2,2,1]0,1,2,3}"); +} + } // namespace } // namespace xla From fb01efc24a76eb53e808737ee882888489bdf3f9 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 21 Sep 2023 14:34:31 -0700 Subject: [PATCH 106/567] [XLA] Hand-roll the evaluator's sort This allows us to not crash if the strict weak ordering requirement is not met. Also, try to detect comparators which violate strict weak ordering are passed in. PiperOrigin-RevId: 567421978 --- third_party/xla/xla/BUILD | 2 +- third_party/xla/xla/comparison_util.h | 33 +- third_party/xla/xla/hlo/evaluator/BUILD | 14 +- .../xla/xla/hlo/evaluator/hlo_evaluator.cc | 389 +++++++++++------- 4 files changed, 264 insertions(+), 174 deletions(-) diff --git a/third_party/xla/xla/BUILD b/third_party/xla/xla/BUILD index b7ea0daba41c4e..184e312a72ede1 100644 --- a/third_party/xla/xla/BUILD +++ b/third_party/xla/xla/BUILD @@ -144,7 +144,7 @@ cc_library( ":xla_data_proto_cc", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/meta:type_traits", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:float8", "@local_tsl//tsl/platform:logging", diff --git a/third_party/xla/xla/comparison_util.h b/third_party/xla/xla/comparison_util.h index e0e5dec1c9eec7..364e503ff7c0c1 100644 --- a/third_party/xla/xla/comparison_util.h +++ b/third_party/xla/xla/comparison_util.h @@ -18,15 +18,15 @@ limitations under the License. #include #include -#include #include #include #include -#include "absl/meta/type_traits.h" +#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "xla/primitive_util.h" #include "xla/statusor.h" +#include "xla/types.h" #include "xla/util.h" #include "xla/xla_data.pb.h" #include "tsl/platform/logging.h" // IWYU pragma: keep @@ -185,28 +185,19 @@ class Comparison { } } - // Applies the comparison from this Comparison's direction and ordering for - // integral types. - template ::is_integer, int> = 0> - inline bool Compare(const T a, const T b) const { - DCHECK(primitive_util::IsCanonicalRepresentation(primitive_type_)); - return GetComparator()(a, b); - } - - // Applies the comparison from this Comparison's direction and ordering - // for floating point types. - template ::is_integer, int> = 0> + template inline bool Compare(const T a, const T b) const { DCHECK(primitive_util::IsCanonicalRepresentation(primitive_type_)); - if (IsTotalOrder()) { - // -NaN < -Inf < -Finite < -0 < +0 < +Finite < +Inf < +NaN - // Reference: - // https://www.tensorflow.org/xla/operation_semantics#element-wise_comparison_operations - using R = SignedIntegerTypeForSizeType; - return GetComparator()(ToSignMagnitude(a), ToSignMagnitude(b)); + if constexpr (is_specialized_floating_point_v) { + if (IsTotalOrder()) { + // -NaN < -Inf < -Finite < -0 < +0 < +Finite < +Inf < +NaN + // Reference: + // https://www.tensorflow.org/xla/operation_semantics#element-wise_comparison_operations + using R = SignedIntegerTypeForSizeType; + return GetComparator()(ToSignMagnitude(a), ToSignMagnitude(b)); + } } + // Applies the comparison from this Comparison's direction and ordering. return GetComparator()(a, b); } diff --git a/third_party/xla/xla/hlo/evaluator/BUILD b/third_party/xla/xla/hlo/evaluator/BUILD index 93e99dc8b357ac..7b3af5b3239d54 100644 --- a/third_party/xla/xla/hlo/evaluator/BUILD +++ b/third_party/xla/xla/hlo/evaluator/BUILD @@ -43,20 +43,23 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//xla:array2d", + "//xla:comparison_util", "//xla:literal", "//xla:literal_util", "//xla:shape_util", + "//xla:status", "//xla:status_macros", "//xla:statusor", "//xla:types", "//xla:util", - "//xla:window_util", "//xla:xla_data_proto_cc", "//xla/hlo/ir:hlo", "//xla/hlo/utils:hlo_query", "//xla/service:call_graph", "//xla/service:compilation_environments", "//xla/service:dynamic_dimension_inference", + "//xla/service:hlo_module_config", + "//xla/service:logical_buffer", "//xla/service:pattern_matcher", "//xla/service:shape_inference", "//xla/service:tuple_points_to_analysis", @@ -68,13 +71,20 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:cord", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", + "@eigen_archive//:eigen3", "@local_tsl//tsl/lib/core:bitmap", + "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:float8", "@local_tsl//tsl/platform:logging", - "@local_tsl//tsl/platform:protobuf", + "@local_tsl//tsl/platform:platform_port", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform:types", diff --git a/third_party/xla/xla/hlo/evaluator/hlo_evaluator.cc b/third_party/xla/xla/hlo/evaluator/hlo_evaluator.cc index c0530ba523e58e..29117bcfcaa82e 100644 --- a/third_party/xla/xla/hlo/evaluator/hlo_evaluator.cc +++ b/third_party/xla/xla/hlo/evaluator/hlo_evaluator.cc @@ -18,15 +18,18 @@ limitations under the License. #include #include #include +#include #include #include #include #include #include +#include #include +#include #include +#include #include -#include #include #include #include @@ -34,40 +37,54 @@ limitations under the License. #include "absl/algorithm/container.h" #include "absl/base/internal/endian.h" #include "absl/cleanup/cleanup.h" +#include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" -#include "absl/strings/match.h" +#include "absl/functional/function_ref.h" +#include "absl/memory/memory.h" +#include "absl/numeric/bits.h" +#include "absl/strings/cord.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" +#include "Eigen/Core" // from @eigen_archive +#include "xla/array2d.h" +#include "xla/comparison_util.h" #include "xla/hlo/evaluator/hlo_evaluator_typed_visitor.h" +#include "xla/hlo/ir/dfs_hlo_visitor_with_default.h" #include "xla/hlo/ir/hlo_casting_utils.h" +#include "xla/hlo/ir/hlo_clone_context.h" #include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_instructions.h" #include "xla/hlo/ir/hlo_opcode.h" #include "xla/hlo/utils/hlo_query.h" #include "xla/index_util.h" +#include "xla/layout.h" #include "xla/layout_util.h" #include "xla/literal.h" #include "xla/literal_util.h" -#include "xla/map_util.h" #include "xla/primitive_util.h" +#include "xla/service/call_graph.h" #include "xla/service/compilation_environments.h" #include "xla/service/cpu/runtime_single_threaded_matmul.h" +#include "xla/service/hlo_module_config.h" +#include "xla/service/logical_buffer.h" #include "xla/service/pattern_matcher.h" #include "xla/service/shape_inference.h" #include "xla/service/tuple_points_to_analysis.h" #include "xla/shape.h" #include "xla/shape_util.h" +#include "xla/status.h" #include "xla/status_macros.h" #include "xla/statusor.h" #include "xla/types.h" #include "xla/util.h" -#include "xla/window_util.h" #include "xla/xla_data.pb.h" -#include "tsl/lib/core/bitmap.h" +#include "tsl/platform/cpu_info.h" +#include "tsl/platform/env.h" #include "tsl/platform/errors.h" #include "tsl/platform/float8.h" #include "tsl/platform/logging.h" -#include "tsl/platform/protobuf.h" #include "tsl/platform/status.h" #include "tsl/platform/statusor.h" #include "tsl/platform/types.h" @@ -79,114 +96,52 @@ namespace { using primitive_util::NativeTypeOf; template -StatusOr Compare(const Shape& shape, ComparisonDirection direction, +StatusOr Compare(const Shape& shape, Comparison comparison, LiteralSlice lhs_literal, LiteralSlice rhs_literal) { - std::function compare_op; - switch (direction) { + auto populate = [&](auto compare_op) -> StatusOr { + Literal result(shape); + TF_RETURN_IF_ERROR(result.PopulateParallel( + [&](absl::Span multi_index, int /*thread_id*/) { + auto lhs = lhs_literal.Get(multi_index); + auto rhs = rhs_literal.Get(multi_index); + if constexpr (is_specialized_floating_point_v) { + if (comparison.IsTotalOrder()) { + return compare_op(ToSignMagnitude(lhs), ToSignMagnitude(rhs)); + } + } + return compare_op(lhs, rhs); + })); + return std::move(result); + }; + switch (comparison.GetDirection()) { case ComparisonDirection::kEq: - compare_op = [](OperandT lhs_el, OperandT rhs_el) { - return lhs_el == rhs_el; - }; - break; + return populate([](auto lhs, auto rhs) { return lhs == rhs; }); case ComparisonDirection::kNe: - compare_op = [](OperandT lhs_el, OperandT rhs_el) { - return lhs_el != rhs_el; - }; - break; + return populate([](auto lhs, auto rhs) { return lhs != rhs; }); case ComparisonDirection::kGe: - compare_op = [](OperandT lhs_el, OperandT rhs_el) { - return lhs_el >= rhs_el; - }; + if constexpr (!is_complex_v) { + return populate([](auto lhs, auto rhs) { return lhs >= rhs; }); + } break; case ComparisonDirection::kGt: - compare_op = [](OperandT lhs_el, OperandT rhs_el) { - return lhs_el > rhs_el; - }; + if constexpr (!is_complex_v) { + return populate([](auto lhs, auto rhs) { return lhs > rhs; }); + } break; case ComparisonDirection::kLe: - compare_op = [](OperandT lhs_el, OperandT rhs_el) { - return lhs_el <= rhs_el; - }; + if constexpr (!is_complex_v) { + return populate([](auto lhs, auto rhs) { return lhs <= rhs; }); + } break; case ComparisonDirection::kLt: - compare_op = [](OperandT lhs_el, OperandT rhs_el) { - return lhs_el < rhs_el; - }; - break; - } - - Literal result(shape); - TF_RETURN_IF_ERROR( - result.Populate([&](absl::Span multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); - - return std::move(result); -} - -template <> -StatusOr Compare(const Shape& shape, - ComparisonDirection direction, - LiteralSlice lhs_literal, - LiteralSlice rhs_literal) { - std::function compare_op; - switch (direction) { - case ComparisonDirection::kEq: - compare_op = [](complex64 lhs_el, complex64 rhs_el) { - return lhs_el == rhs_el; - }; - break; - case ComparisonDirection::kNe: - compare_op = [](complex64 lhs_el, complex64 rhs_el) { - return lhs_el != rhs_el; - }; + if constexpr (!is_complex_v) { + return populate([](auto lhs, auto rhs) { return lhs < rhs; }); + } break; - default: - LOG(FATAL) << "unhandled direction for conversion to Comparison: " - << ComparisonDirectionToString(direction); } - Literal result(shape); - TF_RETURN_IF_ERROR( - result.Populate([&](absl::Span multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); - - return std::move(result); -} - -template <> -StatusOr Compare(const Shape& shape, - ComparisonDirection direction, - LiteralSlice lhs_literal, - LiteralSlice rhs_literal) { - std::function compare_op; - switch (direction) { - case ComparisonDirection::kEq: - compare_op = [](complex128 lhs_el, complex128 rhs_el) { - return lhs_el == rhs_el; - }; - break; - case ComparisonDirection::kNe: - compare_op = [](complex128 lhs_el, complex128 rhs_el) { - return lhs_el != rhs_el; - }; - break; - default: - LOG(FATAL) << "unhandled direction for conversion to Comparison: " - << ComparisonDirectionToString(direction); - } - - Literal result(shape); - TF_RETURN_IF_ERROR( - result.Populate([&](absl::Span multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); - - return std::move(result); + LOG(FATAL) << "unhandled direction for conversion to Comparison: " + << comparison.ToString(); } std::optional GetInstructionStaticValueAsBool( @@ -1572,30 +1527,32 @@ Status HloEvaluator::HandleComplex(const HloInstruction* complex) { Status HloEvaluator::HandleCompare(const HloInstruction* compare) { ComparisonDirection direction = compare->comparison_direction(); + ComparisonOrder order = compare->comparison_order(); auto lhs = compare->operand(0); auto rhs = compare->operand(1); DCHECK(ShapeUtil::SameDimensions(compare->shape(), rhs->shape()) && ShapeUtil::SameDimensions(lhs->shape(), rhs->shape())); TF_RET_CHECK(lhs->shape().element_type() == rhs->shape().element_type()); + auto element_type = lhs->shape().element_type(); + Comparison comparison(direction, element_type, order); const Literal& lhs_literal = GetEvaluatedLiteralFor(lhs); const Literal& rhs_literal = GetEvaluatedLiteralFor(rhs); - // Note here we switch on the operand's type. return primitive_util::PrimitiveTypeSwitch( [&](auto primitive_type_constant) -> Status { if constexpr (primitive_util::IsArrayType(primitive_type_constant)) { using NativeT = primitive_util::NativeTypeOf; TF_ASSIGN_OR_RETURN(evaluated_[compare], - Compare(compare->shape(), direction, + Compare(compare->shape(), comparison, lhs_literal, rhs_literal)); return OkStatus(); } LOG(FATAL) << "HandleCompare: unknown primitive type: " - << PrimitiveType_Name(lhs->shape().element_type()); + << PrimitiveType_Name(element_type); }, - lhs->shape().element_type()); + element_type); } Status HloEvaluator::HandleTuple(const HloInstruction* tuple) { @@ -3969,8 +3926,180 @@ Status HloEvaluator::HandleSort(const HloInstruction* sort) { << "Unexpected out-of-bound sort dimension " << sort_dim << " accessing increment of size " << increment.size(); increment[sort_dim] = sort_dim_elements; - std::unique_ptr embedded_evaluator = - CreateEmbedded(max_loop_iterations_); + + auto comparator = [sort](absl::Span literals_to_sort, + int64_t a, int64_t b, + HloEvaluator* embedded_evaluator) -> StatusOr { + absl::InlinedVector literals; + literals.reserve(2 * sort->operand_count()); + for (int64_t i = 0; i < sort->operand_count(); ++i) { + literals.push_back( + LiteralUtil::GetScalarLiteral(literals_to_sort[i], {a})); + literals.push_back( + LiteralUtil::GetScalarLiteral(literals_to_sort[i], {b})); + } + absl::InlinedVector literal_ptrs; + absl::c_transform(literals, std::back_inserter(literal_ptrs), + [](const Literal& literal) { return &literal; }); + + TF_ASSIGN_OR_RETURN( + auto computed_result, + embedded_evaluator->Evaluate(*sort->to_apply(), literal_ptrs)); + // Clear visit states so that we can use the evaluator again + // on the same computation. + embedded_evaluator->ResetVisitStates(); + return computed_result.Get({}); + }; + auto less_than = [&comparator]( + absl::Span literals_to_sort, int64_t a, + int64_t b, + HloEvaluator* embedded_evaluator) -> StatusOr { + TF_ASSIGN_OR_RETURN(bool a_is_smaller, + comparator(literals_to_sort, a, b, embedded_evaluator)); +#ifndef NDEBUG + // Let's see if the comparator violates strict weak ordering. + // N.B. This does not test transitivity. + TF_ASSIGN_OR_RETURN(bool b_is_smaller, + comparator(literals_to_sort, b, a, embedded_evaluator)); + TF_RET_CHECK(!(b_is_smaller && a_is_smaller)); + TF_ASSIGN_OR_RETURN(bool b_is_reflexive, + comparator(literals_to_sort, b, b, embedded_evaluator)); + TF_RET_CHECK(!b_is_reflexive); + TF_ASSIGN_OR_RETURN(bool a_is_reflexive, + comparator(literals_to_sort, a, a, embedded_evaluator)); + TF_RET_CHECK(!a_is_reflexive); +#endif + return a_is_smaller; + }; + std::function, absl::Span, + absl::Span, absl::Span, + std::vector&, HloEvaluator*)> + merge = [&](absl::Span literals_to_sort, + absl::Span lhs, absl::Span rhs, + absl::Span output, std::vector& tmp, + HloEvaluator* embedded_evaluator) -> Status { + tmp.clear(); + tmp.reserve(output.size()); + // Keep picking between elements. + while (!lhs.empty() && !rhs.empty()) { + // If rhs < lhs, pick rhs. Otherwise, pick lhs. This should ensure + // stability as lhs comes first in the array. + TF_ASSIGN_OR_RETURN(bool rhs_is_smaller, + less_than(literals_to_sort, rhs.front(), lhs.front(), + embedded_evaluator)); + if (rhs_is_smaller) { + tmp.push_back(rhs.front()); + rhs.remove_prefix(1); + } else { + tmp.push_back(lhs.front()); + lhs.remove_prefix(1); + } + } + // At least one of the two input arrays are now empty, we need to copy + // the remaining elements. + absl::c_copy(lhs, std::back_inserter(tmp)); + absl::c_copy(rhs, std::back_inserter(tmp)); + absl::c_copy(tmp, output.begin()); + return OkStatus(); + }; + auto* env = tsl::Env::Default(); + const int max_parallelism = tsl::port::MaxParallelism(); + constexpr size_t kMinElementsPerThread{1024}; + const size_t useful_parallelism = std::min( + sort_dim_elements / kMinElementsPerThread, max_parallelism); + const size_t work_per_thread = useful_parallelism > 1 + ? sort_dim_elements / useful_parallelism + : std::numeric_limits::max(); + std::function, absl::Span, + std::vector*, HloEvaluator*)> + mergesort = [&merge, &mergesort, &less_than, this, env, work_per_thread]( + absl::Span literals_to_sort, + absl::Span to_sort, + std::vector* scratch, + HloEvaluator* embedded_evaluator) -> Status { + // Base case: inputs with 0 or 1 elements are already sorted. + if (to_sort.size() < 2) { + return OkStatus(); + } + size_t halfway = to_sort.size() / 2; + auto lhs = to_sort.subspan(/*pos=*/0, halfway); + auto rhs = to_sort.subspan(/*pos=*/halfway); + + // Allocate an evaluator if we never got one, we will reuse an + // allocator so long as we are not moving it between threads. + std::unique_ptr thread_local_embedded_evaluator; + if (embedded_evaluator == nullptr) { + thread_local_embedded_evaluator = CreateEmbedded(max_loop_iterations_); + embedded_evaluator = thread_local_embedded_evaluator.get(); + } + + constexpr size_t kMinElementsForMergesort{9}; + if (to_sort.size() >= kMinElementsForMergesort) { + std::unique_ptr> thread_local_scratch; + if (!scratch) { + thread_local_scratch = std::make_unique>(); + scratch = thread_local_scratch.get(); + } + // Overlap sorting the LHS with the RHS if we have enough work to + // do. The recursive call for to `mergesort(rhs)` will potentially + // create more threads. + Status lhs_status; + if (to_sort.size() >= work_per_thread) { + std::unique_ptr thread = absl::WrapUnique(env->StartThread( + tsl::ThreadOptions(), "XLA_mergesort", + [literals_to_sort, lhs, &mergesort, &lhs_status] { + lhs_status = mergesort(literals_to_sort, lhs, nullptr, nullptr); + })); + TF_RETURN_IF_ERROR( + mergesort(literals_to_sort, rhs, scratch, embedded_evaluator)); + // Here, `thread` will run its destructor ensuring that it is done + // sorting `lhs`. + thread.reset(); + } else { + TF_RETURN_IF_ERROR( + mergesort(literals_to_sort, rhs, scratch, embedded_evaluator)); + lhs_status = + mergesort(literals_to_sort, lhs, scratch, embedded_evaluator); + } + TF_RETURN_IF_ERROR(lhs_status); + TF_RETURN_IF_ERROR(merge(literals_to_sort, lhs, rhs, to_sort, *scratch, + embedded_evaluator)); + } else { + // Do an insertion sort. Values to the left of `i` are sorted. + // Any values larger than it in will be moved past `i`. Binary + // search in [0, i) looking for the smallest value larger than `i` + // which we will call `ub`. By induction, [ub, i) are all larger + // than `i`. + for (auto i = to_sort.begin(); i != to_sort.end(); ++i) { + auto len = i - to_sort.begin(); + auto ub = to_sort.begin(); + auto needle = *i; + while (len != 0) { + auto half_len = len / 2; + auto midpoint = ub + half_len; + TF_ASSIGN_OR_RETURN(bool is_smaller, + less_than(literals_to_sort, needle, *midpoint, + embedded_evaluator)); + if (is_smaller) { + // Our needle is smaller than the midpoint, we need to shrink + // the range by trimming the rightmost portion of it. We can't + // exclude the midpoint value yet. + len = half_len; + } else { + // Our needle is at least as big as the midpoint but we want + // something larger, we can exclude the midpoint. + ub = midpoint + 1; + len -= half_len + 1; + } + } + // Shift values larger than `i` to the right by 1 and insert `i` + // in the new gap. Now the sorted range is [0, i]. + std::rotate(ub, i, i + 1); + } + } + return OkStatus(); + }; + // Iterate through each dimension except 'sort_dim'. TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus( key_shape, zero_base, key_shape.dimensions(), increment, @@ -3991,49 +4120,9 @@ Status HloEvaluator::HandleSort(const HloInstruction* sort) { } std::vector indices_to_sort(sort_dim_elements); std::iota(indices_to_sort.begin(), indices_to_sort.end(), 0); - Status compare_status = OkStatus(); - auto comparator = [sort, &compare_status, - embedded_evaluator = embedded_evaluator.get(), - &literals_to_sort](int64_t a, int64_t b) { - std::vector literals; - literals.reserve(2 * sort->operand_count()); - for (int64_t i = 0; i < sort->operand_count(); ++i) { - literals.push_back( - LiteralUtil::GetScalarLiteral(literals_to_sort[i], {a})); - literals.push_back( - LiteralUtil::GetScalarLiteral(literals_to_sort[i], {b})); - } - std::vector literal_ptrs; - absl::c_transform(literals, std::back_inserter(literal_ptrs), - [](const Literal& literal) { return &literal; }); - - auto computed_result = - embedded_evaluator->Evaluate(*sort->to_apply(), literal_ptrs); - // Clear visit states so that we can use the evaluator again - // on the same computation. - embedded_evaluator->ResetVisitStates(); - if (!computed_result.ok()) { - compare_status = computed_result.status(); - return false; - } - return computed_result.value().Get({}); - }; - if (!indices_to_sort.empty()) { - // Smoke test of the comparator - it should not be reflexive. - const int64_t a = indices_to_sort[0]; - TF_RET_CHECK(!comparator(a, a)) - << "Invalid sort comparator - does not satisfy the strict weak " - "ordering requirement"; - } - if (Cast(sort)->is_stable()) { - std::stable_sort(indices_to_sort.begin(), indices_to_sort.end(), - comparator); - } else { - std::sort(indices_to_sort.begin(), indices_to_sort.end(), comparator); - } - if (!compare_status.ok()) { - return compare_status; - } + TF_RETURN_IF_ERROR(mergesort(literals_to_sort, + absl::MakeSpan(indices_to_sort), nullptr, + nullptr)); std::vector slice_dimensions(rank, 1); slice_dimensions[sort_dim] = sort_dim_elements; std::vector start_indices(rank, 0); From f65dd98d9b5820a19dc170f2b10ff35c463e588e Mon Sep 17 00:00:00 2001 From: Yu Feng Date: Thu, 21 Sep 2023 14:49:19 -0700 Subject: [PATCH 107/567] Allow additional coordinator arguments to the TPU test. PiperOrigin-RevId: 567425791 --- tensorflow/python/tpu/tpu_test_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/tpu/tpu_test_wrapper.py b/tensorflow/python/tpu/tpu_test_wrapper.py index 3076f53de7a76a..534707655c7844 100644 --- a/tensorflow/python/tpu/tpu_test_wrapper.py +++ b/tensorflow/python/tpu/tpu_test_wrapper.py @@ -213,7 +213,7 @@ def run_user_main(wrapped_test_module): os.environ['TEST_TOTAL_SHARDS'] = saved_total_shards maybe_define_flags() # Parse remaining flags. - FLAGS(unparsed) + FLAGS(unparsed, known_only=True) set_random_test_dir() move_test_classes_into_scope(user_module) From 8bebcb801b6be745c39f6cbfee180cce5e4e9523 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Thu, 21 Sep 2023 14:49:28 -0700 Subject: [PATCH 108/567] Decouple traceme context id from step id in tfrt threadpool interface. PiperOrigin-RevId: 567425831 --- tensorflow/core/tfrt/graph_executor/BUILD | 1 + .../core/tfrt/graph_executor/graph_executor.cc | 9 +++++---- .../core/tfrt/runtime/work_queue_interface.h | 16 ++++++++-------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/tfrt/graph_executor/BUILD b/tensorflow/core/tfrt/graph_executor/BUILD index 7c43ffe7ebb429..7cc355fa1b2e8b 100644 --- a/tensorflow/core/tfrt/graph_executor/BUILD +++ b/tensorflow/core/tfrt/graph_executor/BUILD @@ -122,6 +122,7 @@ cc_library( "@llvm-project//mlir:IR", "@local_tsl//tsl/platform:refcount", "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/profiler/lib:traceme", "@tf_runtime//:basic_kernels_alwayslink", "@tf_runtime//:bef", "@tf_runtime//:befexecutor", diff --git a/tensorflow/core/tfrt/graph_executor/graph_executor.cc b/tensorflow/core/tfrt/graph_executor/graph_executor.cc index 9045e8078c5169..2b8430f7af2af8 100644 --- a/tensorflow/core/tfrt/graph_executor/graph_executor.cc +++ b/tensorflow/core/tfrt/graph_executor/graph_executor.cc @@ -84,6 +84,7 @@ limitations under the License. #include "tsl/platform/errors.h" #include "tsl/platform/refcount.h" #include "tsl/platform/statusor.h" +#include "tsl/profiler/lib/traceme.h" #include "tfrt/bef_converter/mlir_to_bef.h" // from @tf_runtime #include "tfrt/core_runtime/core_runtime.h" // from @tf_runtime #include "tfrt/host_context/async_dispatch.h" // from @tf_runtime @@ -292,8 +293,9 @@ tensorflow::Status GraphExecutionRunOnFunction( process_function_library_runtime, cost_recorder)); int64_t request_id = request_info->tfrt_request_context->id(); - tensorflow::profiler::TraceMeProducer traceme( - // To TraceMeConsumers in RunHandlerThreadPool::WorkerLoop. + // The top level traceme root for this request. The thread pool used later + // will add TraceMeProducer and TraceMeConsumer to connect async tasks. + tsl::profiler::TraceMe traceme( [request_id, signature_name, &options, symbol_uids] { return tensorflow::profiler::TraceMeEncode( "TfrtModelRun", @@ -304,8 +306,7 @@ tensorflow::Status GraphExecutionRunOnFunction( options.model_metadata.version())}, {"tf_symbol_uid", symbol_uids.tf_symbol_uid}, {"tfrt_symbol_uid", symbol_uids.tfrt_symbol_uid}}); - }, - tensorflow::profiler::ContextType::kTfrtExecutor, request_id); + }); // Only configure timer when the deadline is set. if (run_options.deadline.has_value()) { diff --git a/tensorflow/core/tfrt/runtime/work_queue_interface.h b/tensorflow/core/tfrt/runtime/work_queue_interface.h index b5d3f27477a764..08c9f786496796 100644 --- a/tensorflow/core/tfrt/runtime/work_queue_interface.h +++ b/tensorflow/core/tfrt/runtime/work_queue_interface.h @@ -87,16 +87,16 @@ template tfrt::TaskFunction WrapWork(int64_t id, absl::string_view name, Callable&& work) { tensorflow::Context context(tensorflow::ContextKind::kThread); - return tfrt::TaskFunction([id, name = std::string(name), + tensorflow::profiler::TraceMeProducer producer( + [&]() { return absl::StrCat("producer_", name); }, + tensorflow::profiler::ContextType::kTfrtExecutor); + return tfrt::TaskFunction([traceme_id = producer.GetContextId(), + name = std::string(name), context = std::move(context), work = std::forward(work)]() mutable { - // From TraceMeProducer in the function that launches graph execution, eg. - // SavedModelImpl::Run(). - tensorflow::profiler::TraceMeConsumer activity( - [&]() { - return tensorflow::profiler::TraceMeEncode(name, {{"id", id}}); - }, - tensorflow::profiler::ContextType::kTfrtExecutor, id, + tensorflow::profiler::TraceMeConsumer consumer( + [&]() { return absl::StrCat("consumer_", name); }, + tensorflow::profiler::ContextType::kTfrtExecutor, traceme_id, tensorflow::profiler::TraceMeLevel::kInfo); tensorflow::WithContext wc(context); std::forward(work)(); From c9b1800ce365003aff34665ff6564a0e5554de35 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool Date: Thu, 21 Sep 2023 15:02:09 -0700 Subject: [PATCH 109/567] Temporarily disabling XLA CPU oneDNN Dot op rewriter because it causes performance regression in JAX. PiperOrigin-RevId: 567429452 --- third_party/xla/xla/service/cpu/cpu_compiler.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/service/cpu/cpu_compiler.cc b/third_party/xla/xla/service/cpu/cpu_compiler.cc index 5f8153fe13faea..c6c78592825e58 100644 --- a/third_party/xla/xla/service/cpu/cpu_compiler.cc +++ b/third_party/xla/xla/service/cpu/cpu_compiler.cc @@ -698,7 +698,8 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( #if defined(INTEL_MKL) && defined(ENABLE_ONEDNN_V3) // AOT compiled code runs in single thread. if (!is_aot_compile) { - pipeline.AddPass(); + // Temporarily disabling oneDNN rewriter because it causes JAX regression. + // pipeline.AddPass(); } #endif // INTEL_MKL && ENABLE_ONEDNN_V3 From 24171a1a4aa7823e1092176ecde449b65d9bc484 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 15:06:56 -0700 Subject: [PATCH 110/567] [stream_executor] NFC: Prepare header filegroups for defining StreamExecutor API PiperOrigin-RevId: 567430767 --- third_party/xla/xla/stream_executor/BUILD | 57 +++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 15e3642d4afd89..3254a4ead38063 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -30,6 +30,63 @@ package_group( packages = stream_executor_internal(), ) +#===--------------------------------------------------------------------------------------------===# +# StreamExecutor public API +#===--------------------------------------------------------------------------------------------===# + +# We bundle headers into filegroups for internal use only (we re-export the same set of headers +# from multiple targets), and all external clients should depend on one of the public `cc_library` +# targets that have dependencies required for compiling headers (e.g. absl dependencies). These +# filegroup roughly correspond to "StreamExecutor components" that are available to the clients. + +# These are the headers that constitute StreamExecutor public API. Clients should not depend on +# this filegroup directly, but instead depend on a `stream_executor_headers` target if they need +# a header-only dependency (this is a very rare exception when we are building dynamic librarires +# for open source projects, e.g. Tensorflow, internally at Google we almost always link statically), +# or usually on a `sream_executor` target that will also link implementation. + +filegroup( + name = "stream_executor_public_headers", + srcs = [ + "allocator_stats.h", + "command_buffer.h", + "device_description.h", + "device_memory.h", + "device_memory_allocator.h", + "device_options.h", + "event.h", + "executor_cache.h", + "kernel.h", + "kernel_cache_config.h", + "kernel_spec.h", + "launch_dim.h", + "module_spec.h", + "numeric_options.h", + "platform.h", + "plugin.h", + "plugin_registry.h", + "stream.h", + "stream_executor.h", + "stream_executor_internal.h", + "stream_executor_pimpl.h", + "temporary_device_memory.h", + "temporary_memory_manager.h", + "trace_listener.h", + ], + visibility = ["//visibility:public"], +) + +# These are the headers for default StreamExecutor plugins. +filegroup( + name = "stream_executor_plugin_headers", + srcs = [ + "blas.h", + "dnn.h", + "fft.h", + ], + visibility = ["//visibility:public"], +) + #===--------------------------------------------------------------------------------------------===# # StreamExecutor platform-dependent implementation details #===--------------------------------------------------------------------------------------------===# From 8369f9479d79be57fd87c3b1f00ea1eed00dcd29 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 21 Sep 2023 16:25:05 -0700 Subject: [PATCH 111/567] Integrate LLVM at llvm/llvm-project@ebefe83c092e Updates LLVM usage to match [ebefe83c092e](https://github.com/llvm/llvm-project/commit/ebefe83c092e) PiperOrigin-RevId: 567450893 --- .../compiler/mlir/tools/kernel_gen/BUILD | 20 ++++++++++--------- .../mlir/tools/kernel_gen/hlo_to_kernel.cc | 6 +++++- .../kernel_gen/tf_framework_c_interface.cc | 6 +++++- third_party/llvm/workspace.bzl | 4 ++-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index 5ac0ad29c6eeae..395b79491460e8 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -1,24 +1,24 @@ -load( - "@local_xla//xla/stream_executor:build_defs.bzl", - "if_gpu_is_configured", -) load( "//tensorflow:tensorflow.bzl", "check_deps", "tf_cc_binary", ) load( - "@local_tsl//tsl/platform/default:cuda_build_defs.bzl", - "if_cuda_is_configured", + "@local_xla//xla/stream_executor:build_defs.bzl", + "if_gpu_is_configured", +) +load( + "//tensorflow/core/platform:build_config.bzl", + "if_llvm_system_z_available", + "tf_proto_library", ) load( "@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured", ) load( - "//tensorflow/core/platform:build_config.bzl", - "if_llvm_system_z_available", - "tf_proto_library", + "@local_tsl//tsl/platform/default:cuda_build_defs.bzl", + "if_cuda_is_configured", ) package( @@ -122,6 +122,7 @@ tf_cc_binary( "@llvm-project//llvm:X86Disassembler", # fixdeps: keep "@llvm-project//mlir:ExecutionEngineUtils", "@llvm-project//mlir:LLVMToLLVMIRTranslation", + "@llvm-project//mlir:MemRefTransforms", "@llvm-project//mlir:Pass", "@llvm-project//mlir:ToLLVMIRTranslation", ] + if_llvm_system_z_available([ @@ -165,6 +166,7 @@ cc_library( "@llvm-project//llvm:Support", "@llvm-project//mlir:ExecutionEngine", "@llvm-project//mlir:ExecutionEngineUtils", + "@llvm-project//mlir:MemRefTransforms", "@llvm-project//mlir:Parser", "@llvm-project//mlir:mlir_runner_utils", "@local_xla//xla/stream_executor:stream_executor_headers", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/hlo_to_kernel.cc b/tensorflow/compiler/mlir/tools/kernel_gen/hlo_to_kernel.cc index 31e77feaf41c4a..b5537741529d06 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/hlo_to_kernel.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/hlo_to_kernel.cc @@ -35,6 +35,7 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/TargetParser/Host.h" +#include "mlir/Dialect/MemRef/Transforms/AllocationOpInterfaceImpl.h" // from @llvm-project #include "mlir/ExecutionEngine/OptUtils.h" // from @llvm-project #include "mlir/Pass/PassManager.h" // from @llvm-project #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" // from @llvm-project @@ -127,7 +128,10 @@ Status Run(llvm::StringRef input_file, llvm::StringRef output_file, ReadFileToString(Env::Default(), input_file.str(), &hlo_code)); // Compile. - mlir::MLIRContext context; + mlir::DialectRegistry registry; + mlir::memref::registerAllocationOpInterfaceExternalModels(registry); + mlir::MLIRContext context(registry); + llvm::SourceMgr source_mgr; mlir::SourceMgrDiagnosticHandler source_mgr_handler(source_mgr, &context); diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc index 021c1566c78b98..34cbb4069ffeac 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc @@ -21,6 +21,7 @@ limitations under the License. #include "llvm/ADT/SmallVector.h" #include "llvm/Support/TargetSelect.h" +#include "mlir/Dialect/MemRef/Transforms/AllocationOpInterfaceImpl.h" // from @llvm-project #include "mlir/ExecutionEngine/ExecutionEngine.h" // from @llvm-project #include "mlir/ExecutionEngine/OptUtils.h" // from @llvm-project #include "mlir/Parser/Parser.h" // from @llvm-project @@ -185,7 +186,10 @@ llvm::Expected> Compile( } // Create the kernel. - mlir::MLIRContext context; + mlir::DialectRegistry registry; + mlir::memref::registerAllocationOpInterfaceExternalModels(registry); + mlir::MLIRContext context(registry); + mlir::OwningOpRef module; if (item.result_module().empty()) { diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 6b039a5a6e9efe..aaec90a65c08e6 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "afd7db48c55cb87566758e961f1ebac8af16b8bc" - LLVM_SHA256 = "64f1436eb824ee7f6125ae06c7c337c8edfa8763767f38d7fd218ca02b0311c3" + LLVM_COMMIT = "ebefe83c092e41d243829ab812bb650674e2f3d2" + LLVM_SHA256 = "0cac9b05231cd3f0f4efb29fad98ef9b6eb9f01c9c99d016a93764033a603426" tf_http_archive( name = name, From 216a9cea0dff4ce4b12089eff24488babb5a3f4a Mon Sep 17 00:00:00 2001 From: Yang Chen Date: Thu, 21 Sep 2023 16:40:27 -0700 Subject: [PATCH 112/567] #tf-data-service Use native proto casters to simplify pybind code. This way we don't need to define the Python version of `DataServiceMetadata`. PiperOrigin-RevId: 567454478 --- tensorflow/python/data/experimental/service/BUILD | 1 + .../experimental/service/_pywrap_server_lib.pyi | 7 ------- .../experimental/service/server_lib_wrapper.cc | 15 ++++----------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/data/experimental/service/BUILD b/tensorflow/python/data/experimental/service/BUILD index 40394834021318..312d9607056906 100644 --- a/tensorflow/python/data/experimental/service/BUILD +++ b/tensorflow/python/data/experimental/service/BUILD @@ -22,6 +22,7 @@ tf_python_pybind_extension( "//third_party/python_runtime:headers", "@com_google_absl//absl/strings", "@pybind11", + "@pybind11_protobuf//pybind11_protobuf:native_proto_caster", ], ) diff --git a/tensorflow/python/data/experimental/service/_pywrap_server_lib.pyi b/tensorflow/python/data/experimental/service/_pywrap_server_lib.pyi index 63a57f2ed08b3c..d39c6ac8225da8 100644 --- a/tensorflow/python/data/experimental/service/_pywrap_server_lib.pyi +++ b/tensorflow/python/data/experimental/service/_pywrap_server_lib.pyi @@ -15,13 +15,6 @@ from typing import Any -class DataServiceMetadata: - def __init__(self) -> None: ... - @property - def compression(self) -> Any: ... - @property - def element_spec(self) -> bytes: ... - class DispatchGrpcDataServer: def __init__(self, *args, **kwargs) -> None: ... def bound_port(self) -> int: ... diff --git a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc index 13315eaa137de4..a3157997249423 100644 --- a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc +++ b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - +#include #include #include #include @@ -26,6 +26,7 @@ limitations under the License. #include "pybind11/pybind11.h" // from @pybind11 #include "pybind11/pytypes.h" // from @pybind11 #include "pybind11/stl.h" // from @pybind11 +#include "pybind11_protobuf/native_proto_caster.h" // from @pybind11_protobuf #include "tensorflow/core/data/service/common.pb.h" #include "tensorflow/core/data/service/dispatcher_client.h" #include "tensorflow/core/data/service/grpc_util.h" @@ -40,6 +41,8 @@ limitations under the License. namespace py = pybind11; PYBIND11_MODULE(_pywrap_server_lib, m) { + pybind11_protobuf::ImportNativeProtoCasters(); + py::class_(m, "DispatchGrpcDataServer") .def("start", &tensorflow::data::DispatchGrpcDataServer::Start) @@ -147,16 +150,6 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { }, py::return_value_policy::reference); - py::class_ data_service_metadata( - m, "DataServiceMetadata"); - data_service_metadata.def(py::init<>()) - .def_property_readonly( - "element_spec", - [](const tensorflow::data::DataServiceMetadata& data_service_metadata) - -> py::bytes { return data_service_metadata.element_spec(); }) - .def_property_readonly( - "compression", &tensorflow::data::DataServiceMetadata::compression) - .def("__repr__", &tensorflow::data::DataServiceMetadata::DebugString); py::class_ snapshot_task_progress_wrapper(m, "SnapshotTaskProgressWrapper"); snapshot_task_progress_wrapper.def(py::init<>()) From 2247d487e8d276ca774a52d51930ed3857c433d4 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 16:50:49 -0700 Subject: [PATCH 113/567] [stream_executor] NFC: Remove stream_executor_headers dependencies outside of StreamExecutor `stream_executor_headers` is an implementation detail of StreamExecutor, all external clients should depend on a regular `stream_executor` target (unless there is a reason to depend on header only target). And it makes absolutely no sense to depend on both targets. PiperOrigin-RevId: 567456924 --- .../compiler/mlir/tools/kernel_gen/BUILD | 4 +-- tensorflow/compiler/tf2xla/BUILD | 2 +- tensorflow/compiler/tf2xla/kernels/BUILD | 2 +- tensorflow/compiler/xrt/kernels/BUILD | 4 +-- tensorflow/core/common_runtime/gpu/BUILD | 10 +++--- tensorflow/core/kernels/BUILD | 2 +- tensorflow/core/tpu/kernels/BUILD | 4 +-- tensorflow/core/util/autotune_maps/BUILD | 12 +++---- third_party/xla/xla/service/BUILD | 2 +- third_party/xla/xla/service/gpu/BUILD | 32 ++++++++----------- third_party/xla/xla/service/gpu/runtime/BUILD | 16 +++++----- third_party/xla/xla/service/gpu/tests/BUILD | 3 +- third_party/xla/xla/stream_executor/BUILD | 1 + 13 files changed, 44 insertions(+), 50 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index 395b79491460e8..e6ce181074de7f 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -169,7 +169,7 @@ cc_library( "@llvm-project//mlir:MemRefTransforms", "@llvm-project//mlir:Parser", "@llvm-project//mlir:mlir_runner_utils", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", ], ) @@ -204,7 +204,7 @@ cc_library( "@com_google_absl//absl/strings", "@llvm-project//mlir:mlir_runner_utils", "@local_tsl//tsl/platform:hash", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", ] + if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", "@local_xla//xla/stream_executor/cuda:stream_executor_cuda", diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index c0d81ef159c287..44ecb3ee0e2b27 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -694,7 +694,7 @@ cc_library( "@local_xla//xla/hlo/ir:hlo", "@local_xla//xla/service:computation_placer_hdr", "@local_xla//xla/service/gpu:gpu_executable_run_options", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", "@local_xla//xla/translate/mhlo_to_hlo:layout_util", ], alwayslink = 1, diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 4b2af30072bb2f..a3b082939c1dc8 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -216,7 +216,7 @@ tf_cuda_library( "@local_xla//xla/service:custom_call_status", "@local_xla//xla/service:custom_call_target_registry", "@local_xla//xla/service:hlo_proto_cc", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor/gpu:gpu_executor_header", "@local_xla//xla/stream_executor/gpu:gpu_stream_header", "@local_xla//xla/stream_executor/gpu:gpu_types_header", diff --git a/tensorflow/compiler/xrt/kernels/BUILD b/tensorflow/compiler/xrt/kernels/BUILD index fb9995240dd897..e4c4075a392c3a 100644 --- a/tensorflow/compiler/xrt/kernels/BUILD +++ b/tensorflow/compiler/xrt/kernels/BUILD @@ -97,7 +97,7 @@ cc_library( "@local_xla//xla/service:computation_placer", "@local_xla//xla/service:dump", "@local_xla//xla/service:hlo_proto_cc", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor/tpu:tpu_api", ], alwayslink = 1, @@ -140,7 +140,7 @@ cc_library( "@local_xla//xla/service:compiler", "@local_xla//xla/service:computation_placer", "@local_xla//xla/service/gpu:gpu_executable_run_options", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", ], alwayslink = 1, ) diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index 7b91cc850489a9..4652e0051cd8fd 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -14,15 +14,15 @@ load( "filegroup", "tf_cuda_cc_test", ) -load( - "//tensorflow/core/platform:rules_cc.bzl", - "cc_library", -) load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", "tf_cuda_tests_tags", ) +load( + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library", +) package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], @@ -282,8 +282,8 @@ tf_cuda_library( "//tensorflow/core/platform:stream_executor", "//tensorflow/core/profiler/lib:traceme", "@com_google_absl//absl/strings:str_format", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor:platform", - "@local_xla//xla/stream_executor:stream_executor_headers", ], ) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 1612d7c1a707be..35957cc529c019 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4078,7 +4078,7 @@ tf_kernel_library( "@local_xla//xla/stream_executor/cuda:cudnn_plugin", "@local_xla//xla/stream_executor/gpu:gpu_asm_opts", "@local_xla//xla/stream_executor:tf_allocator_adapter", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", "//tensorflow/core/platform:stream_executor", ]) + if_cuda_or_rocm([ ":gpu_utils", diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index b7ac39d708bb39..4ac173f9c5b655 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -201,7 +201,7 @@ tf_kernel_library( "@local_tsl//tsl/platform:tstring", "@local_tsl//tsl/protobuf:error_codes_proto_impl_cc", "@local_xla//xla:util", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor/tpu:proto_helper", "@local_xla//xla/stream_executor/tpu:status_helper", "@local_xla//xla/stream_executor/tpu:tpu_api", @@ -238,7 +238,7 @@ tf_kernel_library( "//tensorflow/core/tpu:tpu_defs", "@com_google_absl//absl/cleanup", "@local_xla//xla:util", - "@local_xla//xla/stream_executor:stream_executor_headers", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor/tpu:proto_helper", "@local_xla//xla/stream_executor/tpu:status_helper", "@local_xla//xla/stream_executor/tpu:tpu_api", diff --git a/tensorflow/core/util/autotune_maps/BUILD b/tensorflow/core/util/autotune_maps/BUILD index cf0b8eda2abb06..4be1a30bf6d410 100644 --- a/tensorflow/core/util/autotune_maps/BUILD +++ b/tensorflow/core/util/autotune_maps/BUILD @@ -3,6 +3,11 @@ # and later restore them. # Placeholder: load py_proto_library +load( + "//tensorflow:tensorflow.bzl", + "tf_cuda_library", + "tf_cuda_only_cc_test", +) # TODO(ruochengw): Currently only supports contrib's fused_conv2d_bias_activation_op. # We plan to add more ops and move fused_conv2d_bias_activation_op back into core library. @@ -14,11 +19,6 @@ load( "//tensorflow/core/platform:rules_cc.bzl", "cc_library", ) -load( - "//tensorflow:tensorflow.bzl", - "tf_cuda_library", - "tf_cuda_only_cc_test", -) package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], @@ -136,9 +136,9 @@ tf_cuda_library( "//tensorflow/core/platform:status", "//tensorflow/core/platform:stream_executor", "@local_xla//xla:status_macros", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor:dnn_proto_cc", "@local_xla//xla/stream_executor:lazy_op_runner", - "@local_xla//xla/stream_executor:stream_executor_headers", "@local_xla//xla/stream_executor/gpu:gpu_init", ], ) diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 65c42edbbfcaad..2aee4c3a9ec378 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -3846,7 +3846,7 @@ cc_library( "//xla:status", "//xla:statusor", "//xla:xla_data_proto_cc", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@com_google_absl//absl/container:flat_hash_map", "@local_tsl//tsl/platform:status", ], diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index c6f2f7d1ba3a08..2427757038d855 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -255,8 +255,8 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":gpu_types", + "//xla/stream_executor", "//xla/stream_executor:device_description_proto_cc", - "//xla/stream_executor:stream_executor_headers", ], ) @@ -308,7 +308,7 @@ cc_library( ":gpu_executable", "//xla/service:buffer_assignment", "//xla/service:name_uniquer", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@llvm-project//llvm:ir_headers", "@llvm-project//mlir:IR", ], @@ -1177,7 +1177,6 @@ cc_library( "@local_tsl//tsl/platform:logging", "//xla/stream_executor", "//xla/stream_executor:device_memory", - "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/gpu:gpu_asm_opts", ]) + ["@local_tsl//tsl/platform:status"], ) @@ -1205,7 +1204,6 @@ cc_library( "@local_tsl//tsl/platform:logging", "//xla/stream_executor", "//xla/stream_executor:device_memory", - "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/gpu:gpu_asm_opts", ]) + ["@local_tsl//tsl/platform:status"], ) @@ -1225,7 +1223,6 @@ cc_library( "//xla/service:buffer_assignment", "//xla/stream_executor", "//xla/stream_executor:scratch_allocator", - "//xla/stream_executor:stream_executor_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -1376,8 +1373,8 @@ cc_library( ":thunk", "//xla:status", "//xla/service:buffer_assignment", + "//xla/stream_executor", "//xla/stream_executor:device_memory", - "//xla/stream_executor:stream_executor_headers", "@local_tsl//tsl/platform:logging", ], ) @@ -1400,7 +1397,7 @@ cc_library( "//xla/service:buffer_assignment", "//xla:status", "//xla/stream_executor:device_memory", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:statusor", ]) + if_cuda_is_configured([ @@ -1556,7 +1553,7 @@ cc_library( "//xla/hlo/ir:hlo", "//xla/mlir_hlo", "//xla/mlir_hlo:lhlo_gpu", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/types:span", "@local_tsl//tsl/platform:statusor", @@ -2181,7 +2178,7 @@ cc_library( "//xla:shape_util", "//xla:window_util", "//xla/hlo/ir:hlo", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@local_tsl//tsl/platform:status", ], ) @@ -2198,8 +2195,8 @@ xla_cc_test( "//xla/service:hlo_parser", "//xla/service:pattern_matcher", "//xla/service:pattern_matcher_gmock", + "//xla/stream_executor", "//xla/stream_executor:device_description", - "//xla/stream_executor:stream_executor_headers", "//xla/tests:hlo_test_base", "//xla/tests:verified_hlo_module", "//xla/tests:xla_internal_test_main", @@ -2225,7 +2222,7 @@ cc_library( "//xla:window_util", "//xla/hlo/ir:hlo", "//xla/service:hlo_pass", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@com_google_absl//absl/functional:bind_front", "@local_tsl//tsl/platform:status", ], @@ -2262,7 +2259,7 @@ cc_library( "//xla/client:xla_builder", "//xla/hlo/ir:hlo", "//xla/service:hlo_pass", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", ], ) @@ -2405,7 +2402,6 @@ cc_library( "//xla/service:generic_transfer_manager", "//xla/service:transfer_manager", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/cuda:cuda_platform_id", "//xla/stream_executor/host:host_platform_id", "//xla/stream_executor/rocm:rocm_platform_id", @@ -2729,7 +2725,6 @@ cc_library( "//xla/service/spmd:collective_permute_motion", "//xla/service/spmd:stateful_rng_spmd_partitioner", "//xla/stream_executor:device_description_proto_cc_impl", - "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor", "//xla/stream_executor/cuda:cuda_platform_id", "//xla/translate/hlo_to_mhlo:hlo_utils", @@ -2869,7 +2864,7 @@ cc_library( "//xla/service:tuple_simplifier", "//xla/service/gpu/llvm_gpu_backend", "//xla/service/llvm_ir:llvm_util", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "//xla/stream_executor/cuda:cuda_diagnostics", "//xla/stream_executor/cuda:cuda_platform_id", "//xla/stream_executor/gpu:asm_compiler", @@ -3394,7 +3389,7 @@ xla_cc_test( "//xla/hlo/ir:hlo", "//xla/service:hlo_runner", "//xla/service:platform_util", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "@com_google_absl//absl/log", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -3435,7 +3430,6 @@ cc_library( "//xla:util", "//xla/service:hlo_module_config", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/gpu:asm_compiler", "@local_tsl//tsl/platform:statusor", ]), @@ -3589,8 +3583,8 @@ cc_library( "//xla/service:hlo_creation_utils", "//xla/service:hlo_pass", "//xla/service:pattern_matcher", + "//xla/stream_executor", "//xla/stream_executor:dnn_proto_cc", - "//xla/stream_executor:stream_executor_headers", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", @@ -4200,7 +4194,7 @@ cc_library( "//xla:types", "//xla:util", "//xla/stream_executor:device_memory", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "//xla/stream_executor/gpu:asm_compiler", "//xla/stream_executor/gpu:gpu_asm_opts", ]) + if_rocm_is_configured([ diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index 8eef320f5fa5b0..168a6e04b071d4 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -1,14 +1,14 @@ -load("@local_tsl//tsl/platform/default:cuda_build_defs.bzl", "if_cuda_is_configured") -load("//xla:xla.bzl", "xla_cc_test") load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library") -load( - "@local_tsl//tsl/platform:build_config_root.bzl", - "tf_cuda_tests_tags", -) +load("//xla:xla.bzl", "xla_cc_test") load( "@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured", ) +load( + "@local_tsl//tsl/platform:build_config_root.bzl", + "tf_cuda_tests_tags", +) +load("@local_tsl//tsl/platform/default:cuda_build_defs.bzl", "if_cuda_is_configured") package( default_visibility = ["//visibility:public"], @@ -202,7 +202,7 @@ cc_library( "//xla/service/gpu:buffer_allocations", "//xla/service/gpu:non_atomically_upgradeable_rw_lock", "//xla/service/gpu:thunk", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "//xla/stream_executor/gpu:gpu_stream", "@com_google_absl//absl/container:inlined_vector", "@local_tsl//tsl/protobuf:dnn_proto_cc", @@ -241,8 +241,8 @@ cc_library( ":topk_kernel_cuda", "//xla:shape_util", "//xla:xla_data_proto_cc", + "//xla/stream_executor", # build_cleaner: keep "//xla/stream_executor:platform", - "//xla/stream_executor:stream_executor_headers", # build_cleaner: keep "//xla/stream_executor/gpu:gpu_stream_header", "//xla/stream_executor/gpu:gpu_types_header", "@com_google_absl//absl/numeric:bits", diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD index 8fe908e22901c3..854766e620a00f 100644 --- a/third_party/xla/xla/service/gpu/tests/BUILD +++ b/third_party/xla/xla/service/gpu/tests/BUILD @@ -58,7 +58,7 @@ cc_library( "//xla:types", "//xla/service:gpu_plugin", "//xla/service/gpu:gpu_executable", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor", "//xla/tests:filecheck", "//xla/tests:llvm_irgen_test_base", "//xla/tests:verified_hlo_module", @@ -848,7 +848,6 @@ xla_cc_test( "//xla/stream_executor/gpu:asm_compiler", "//xla/service/gpu:gpu_asm_opts_util", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_headers", "//xla/service/gpu:stream_executor_util", "//xla/stream_executor:device_memory", ]), diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 3254a4ead38063..afbc0451827152 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -781,6 +781,7 @@ cc_library( "plugin.h", "plugin_registry.h", "stream.h", + "numeric_options.h", "stream_executor.h", "stream_executor_internal.h", "stream_executor_pimpl.h", From 34ad3d985a520364be3a3aa51d894dfe482964a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 17:02:21 -0700 Subject: [PATCH 114/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/73bfccd957234f8cc02c69dc50078288b6d4db8c. PiperOrigin-RevId: 567459578 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index a788df8889a6a7..5b50da65a5d4de 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "6a3e3ece9c01f3e5742a297c73357d463a2fe151" - TFRT_SHA256 = "24477cd3e9ac93a1010542de308341f1c112df974f9510c18e4d4efb2de78e59" + TFRT_COMMIT = "73bfccd957234f8cc02c69dc50078288b6d4db8c" + TFRT_SHA256 = "2ae79e0aa046864afc9b43a23ee077c52e2f91a192e62f6ca8e82a68a206f116" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index a788df8889a6a7..5b50da65a5d4de 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "6a3e3ece9c01f3e5742a297c73357d463a2fe151" - TFRT_SHA256 = "24477cd3e9ac93a1010542de308341f1c112df974f9510c18e4d4efb2de78e59" + TFRT_COMMIT = "73bfccd957234f8cc02c69dc50078288b6d4db8c" + TFRT_SHA256 = "2ae79e0aa046864afc9b43a23ee077c52e2f91a192e62f6ca8e82a68a206f116" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index a788df8889a6a7..5b50da65a5d4de 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "6a3e3ece9c01f3e5742a297c73357d463a2fe151" - TFRT_SHA256 = "24477cd3e9ac93a1010542de308341f1c112df974f9510c18e4d4efb2de78e59" + TFRT_COMMIT = "73bfccd957234f8cc02c69dc50078288b6d4db8c" + TFRT_SHA256 = "2ae79e0aa046864afc9b43a23ee077c52e2f91a192e62f6ca8e82a68a206f116" tf_http_archive( name = "tf_runtime", From 83b712c02c2fccff2667e706794c722edb0bce3b Mon Sep 17 00:00:00 2001 From: Praveen Narayanan Date: Thu, 21 Sep 2023 17:16:12 -0700 Subject: [PATCH 115/567] Fixes to avoid deadlocks with collectives in the pipelining while loop PiperOrigin-RevId: 567462558 --- .../tests/embedding_pipelining.mlir | 89 ------------- .../transforms/embedding_pipelining.cc | 119 ++---------------- 2 files changed, 11 insertions(+), 197 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/embedding_pipelining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/embedding_pipelining.mlir index ead04ab9abd579..fdb64b900e7a8d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/embedding_pipelining.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/embedding_pipelining.mlir @@ -555,95 +555,6 @@ module { } } -// ----- -// This test verifies the handling of CollectiveGatherV2 ops. -module { - func.func @main() { - %cst_main = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %0 = "tf.While"(%cst_main) {body = @while_body, cond = @while_cond, is_stateless = false} : (tensor) -> (tensor) - return - } - func.func private @while_body(%arg0: tensor) -> (tensor) { - // Verify the overall pipelining control flow and supporting functions. - // The order of these functions is also significant. - // CHECK: {{.*StatefulPartitionedCall.* f = @while_cond.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @non_tpu.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @start_step_0.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @while_cond.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @non_tpu.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @start_step_1.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @while_cond.*}} - // CHECK: {{.*tf.While.* body = @new_while_body.* cond = @new_while_cond.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @finish_step_nm2.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @finish_step_nm1.*}} - // CHECK: return - // metadata ops - "tf.TPUReplicateMetadata"() {_has_manual_control_dependencies = true, _replication_info = "repl_info", num_replicas = 2 : i64} : () -> () - %comp_res = "tf.TPUCompilationResult"() {_tpu_compilation_status = "repl_info"} : () -> tensor - - // forward_ops - %v_0 = "tf.StatefulPartitionedCall"(){_collective_manager_ids = [], _read_only_resource_inputs = [], config = "", config_proto = "blah", device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0", executor_type = "", f = @helper_task0}:() -> (tensor) - %v_1 = "tf.StatefulPartitionedCall"(){_collective_manager_ids = [], _read_only_resource_inputs = [], config = "", config_proto = "blah", device = "/job:tpu_host_worker/replica:0/task:2/device:CPU:0", executor_type = "", f = @helper_task1}:() -> (tensor) - %a_0 = "tf.Identity"(%v_0) {_embedding_pipelining = "forward", _replication_info = "repl_info"}: (tensor) -> tensor - %a_1 = "tf.Identity"(%v_1) {_embedding_pipelining = "forward", _replication_info = "repl_info"}: (tensor) -> tensor - %res_f = "tf.Const"() {_embedding_pipelining = "forward", _replication_info = "repl_info", value = dense<2> : tensor} : () -> tensor - - // core_tpu ops: - %res_t = "tf.Identity"(%res_f) {_replication_info = "repl_info"} : (tensor) -> tensor - - // backward_ops - %res_b = "tf.Identity"(%res_t) {_embedding_pipelining = "backward", _replication_info = "repl_info"} : (tensor) -> tensor - - // non_tpu_ops - %res_n = "tf.Identity"(%arg0) : (tensor) -> tensor - - return %res_n : tensor - } - func.func private @helper_task0() -> tensor { - %grpsz_0 = "tf.Const"() {device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0", value = dense<2> : tensor} : () -> tensor - %grpky_0 = "tf.Const"() {device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0", value = dense<0> : tensor} : () -> tensor - %cgi_0 = "tf.Const"() {device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0", value = dense<2> : tensor} : () -> tensor - %gid64_0 = "tf.GlobalIterId"() {device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"} : () -> tensor<*xi64> - %gid_0 = "tf.Cast"(%gid64_0) {Truncate = false, device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"} : (tensor<*xi64>) -> tensor<*xi32> - %cg_0 = "tf.CollectiveGatherV2"(%cgi_0, %grpsz_0, %grpky_0, %gid_0) {communication_hint = "auto", device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0", is_stateless = true, timeout_seconds = 0.000000e+00 : f32} : (tensor, tensor, tensor, tensor<*xi32>) -> tensor - return %cg_0 : tensor - } - func.func private @helper_task1() -> tensor { - %grpsz_1 = "tf.Const"() {device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0", value = dense<2> : tensor} : () -> tensor - %grpky_1 = "tf.Const"() {device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0", value = dense<0> : tensor} : () -> tensor - %cgi_1 = "tf.Const"() {device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0", value = dense<2> : tensor} : () -> tensor - %gid64_1 = "tf.GlobalIterId"() {device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"} : () -> tensor<*xi64> - %gid_1 = "tf.Cast"(%gid64_1) {Truncate = false, device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"} : (tensor<*xi64>) -> tensor<*xi32> - %cg_1 = "tf.CollectiveGatherV2"(%cgi_1, %grpsz_1, %grpky_1, %gid_1) {communication_hint = "auto", device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0", is_stateless = true, timeout_seconds = 0.000000e+00 : f32} : (tensor, tensor, tensor, tensor<*xi32>) -> tensor - return %cg_1 : tensor - } - func.func private @while_cond(%arg0: tensor) -> tensor { - %0 = "tf.Less"(%arg0, %arg0) : (tensor, tensor) -> tensor - return %0 : tensor - } - // Generated functions for control flow ops (if, while, switch) - - // - // CHECK: func.func private @start_step_0 - // CHECK: tf.GlobalIterId - // CHECK: tf.AddV2 - // CHECK: tf.CollectiveGatherV2 - // CHECK: return - - // - // CHECK: func.func private @start_step_1 - // CHECK: tf.GlobalIterId - // CHECK: tf.AddV2 - // CHECK: tf.CollectiveGatherV2 - // CHECK: return - - // - // CHECK: func.func private @new_while_body - // CHECK: {{.*StatefulPartitionedCall.* f = @helper_task0.*}} - // CHECK: {{.*StatefulPartitionedCall.* f = @helper_task1.*}} - // CHECK: return -} - // ----- // This test verifies that for ops with multiple TPU -> backward edges, we // create input/output ops for all of them. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/embedding_pipelining.cc b/tensorflow/compiler/mlir/tensorflow/transforms/embedding_pipelining.cc index 3463e17beed1c0..3257cc2e6bbe43 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/embedding_pipelining.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/embedding_pipelining.cc @@ -375,106 +375,16 @@ struct Inliner : public InlinerInterface { return LogicalResult::success(); } - bool HasCollectiveGathers(func::FuncOp func) { - return !func.getRegion().getOps().empty(); - } - - LogicalResult PatchCollectiveGatherInstanceKey(func::FuncOp func) { - // We're expecting the original model to have a single CollectiveGatherV2Op - // with the instance_key set to the global_iter_id (see GlobalIterIdOp). - // That collective gets split into 3 copies in the start_step_0, - // start_step_1 and new_while_body functions. At this point, however, we're - // expecting one gather per device and we want them all to have the same - // instance key. To make sure the instance keys are unique among the three - // functions them we replace the original instance key (global_iter_id) as: - // new_instance_key = global_iter_id + c - // where c = 0, 1, or 2 depending on which function it's being replaced in. - // Note, also, that the following code assumes this inlining pass is call - // for start_step_0 and start_step_1 before new_while_body. - // - // Verify our assumption that we have just 3 collectives (per device). - const int32_t max_collectives = 3; - static int32_t offset_value = -2; - bool has_gathers = false; - for (auto gather_op : func.getRegion().getOps()) { - if (offset_value >= max_collectives) { - gather_op->emitError() - << "Expected to find only " << max_collectives - << " CollectiveGatherV2 ops but found " << offset_value; - return LogicalResult::failure(); - } - has_gathers = true; - Value orig_instance_key = gather_op->getOperand(3); - auto loc = gather_op->getLoc(); - builder.setInsertionPoint(gather_op); - auto offset = builder.create( - loc, builder.getI32IntegerAttr(offset_value)); - auto new_instance_key = builder.create( - loc, orig_instance_key, offset->getResult(0)); - gather_op->setOperand(3, new_instance_key->getResult(0)); - std::vector attr_names = { - TF::kReplicationInfoAttr.str(), "_xla_compile_device_type", - kEmbeddingPipelining, "_xla_outside_compilation", "device"}; - for (const auto& attr_name : attr_names) { - if (!gather_op->hasAttr(attr_name)) continue; - offset->setAttr(attr_name, gather_op->getAttr(attr_name)); - new_instance_key->setAttr(attr_name, gather_op->getAttr(attr_name)); - } - } - // Make the next function to get inlined use a different offset. - if (has_gathers) ++offset_value; - return LogicalResult::success(); - } - - LogicalResult PatchCollectiveGatherOps(func::FuncOp func) { - // We currently expect the gathers to be in nested functions. Check the - // functions called from this function to see if they have gather ops. If - // so, then inline that function so we can locally modify the instance keys - // for the gathers. - llvm::SetVector ops_to_erase; - for (auto caller : - func.getRegion().getOps()) { - auto callee_op = symbol_table.lookup(caller.getF()); - if (callee_op == nullptr) { - func.emitError() << "Symbol not found in SymbolTable: " - << caller.getF(); - return LogicalResult::failure(); - } - func::FuncOp callee = llvm::dyn_cast(callee_op); - // If the function called here doesn't have gathers then ignore it. - if (!HasCollectiveGathers(callee)) continue; - - // Do the inlining. - VLOG(1) << "Nested inlining " << caller.getF().str(); - auto& src_region = callee.getRegion(); - auto result = inlineCall(*this, caller, callee, &src_region, true); - if (failed(result)) { - func.emitError("CollectiveGather Inlining failed"); - return result; - } - ops_to_erase.insert(caller); - } - // If we didn't find nested gathers, we're done. - if (ops_to_erase.empty()) return LogicalResult::success(); - - for (auto op : ops_to_erase) op->erase(); - - // Ok, now we need to update the instance keys. We're expecting one gather - // per device and we should give them all the same instance key. - return PatchCollectiveGatherInstanceKey(func); - return LogicalResult::success(); - } - // Find any StatefulPartitionedCalls and inline their contents in this func. LogicalResult InlineCallsInFunc(func::FuncOp func, - bool patch_gathers = false) { + bool inline_all_funcs = false) { llvm::SetVector ops_to_erase; for (auto caller : func.getRegion().getOps()) { - if (!caller->hasAttr(kEmbeddingPipeliningInlineAttr)) { + if (!inline_all_funcs && + !caller->hasAttr(kEmbeddingPipeliningInlineAttr)) { continue; } - VLOG(1) << "Inlining " << caller.getF().str(); Operation* symbol = symbol_table.lookup(caller.getF()); if (symbol == nullptr) { func.emitError() << "Symbol not found in SymbolTable: " @@ -497,11 +407,6 @@ struct Inliner : public InlinerInterface { } for (auto op : ops_to_erase) op->erase(); - if (patch_gathers) { - auto result = PatchCollectiveGatherOps(func); - if (failed(result)) return result; - } - auto result = UnifyReplicationInfo(func); if (failed(result)) return result; @@ -633,7 +538,7 @@ void GatherOpsForExtraction(mlir::SetVector* operations, // Walk the input and output dependencies of the Ops in `operations` to form // the closer of Ops needed to evaluate 'operations'. Input dependencies are // walked if 'predecessors' is true and output dependencies are walked if - // 'successors' is true. In either case, if a discovered Op is in the + // 'successors' is true. In either case, if a discoverd Op is in the // 'ops_to_avoid' set, then the dependency walking is terminated. llvm::SetVector ops_to_process(*operations); llvm::SetVector new_ops; @@ -1258,7 +1163,7 @@ LogicalResult ExtractOpsAsFunc( if (!ops.contains(defining_op)) inputs.insert(operand); } } - // Find the output edges to form the set of results of the new function call. + // Find the output edges to form the set of resutls of the new function call. llvm::SetVector results; for (Operation* op : ops) { for (auto result : op->getResults()) { @@ -1316,7 +1221,7 @@ int FindReturnIndex(Value val) { } // Skip the assertions because they currently create problematic dependencies. -constexpr bool kDoAssertions = false; +constexpr bool kDoAssertions = true; void AddAssertion(OpBuilder& builder, Location& loc, Value cond, const std::string& message) { @@ -1404,8 +1309,7 @@ LogicalResult StartStep0(OpBuilder& builder, Location& loc, func_builder.create(loc, results); // Inline any StatefulPartitionCall Ops. - auto result = Inliner(builder, symbol_table) - .InlineCallsInFunc(then_func, /*patch_gathers=*/true); + auto result = Inliner(builder, symbol_table).InlineCallsInFunc(then_func); if (failed(result)) return result; builder.restoreInsertionPoint(insertion_point); @@ -1463,8 +1367,7 @@ LogicalResult StartStep1(OpBuilder& builder, Location& loc, func_builder.create(loc, new_forward->getResults()); // Inline any StatefulPartitionCall Ops. - auto result = Inliner(builder, symbol_table) - .InlineCallsInFunc(then_func, /*patch_gathers=*/true); + auto result = Inliner(builder, symbol_table).InlineCallsInFunc(then_func); if (failed(result)) return result; builder.restoreInsertionPoint(insertion_point); @@ -2159,8 +2062,8 @@ void EmbeddingPipeliningPass::runOnOperation() { // // Finish step i-1 // - // Second, add all the inputs to core_tpu(). These all come from the while - // loop operands, sc_forward() or non_tpu() and need to be pulled from the + // Second, add all the inputs to core_tpu(). Thesse all come from the while + // loop opernads, sc_forward() or non_tpu() and need to be pulled from the // "i-1" (or "1") version of the inputs. std::vector t_operands; result = MakeCoreTPUOperands(core_tpu_caller, non_tpu_caller, forward_caller, @@ -2299,7 +2202,7 @@ void EmbeddingPipeliningPass::runOnOperation() { *orig_while_op->getParentRegion()); // Inline the new while body. - result = Inliner(builder, symbol_table).InlineCallsInFunc(body); + result = Inliner(builder, symbol_table).InlineCallsInFunc(body, false); if (failed(result)) return signalPassFailure(); // Erase original while op and temporary functions. Note, we use the non_tpu From e70006f20cb98ac17876fadf3ab9aa6d17149aeb Mon Sep 17 00:00:00 2001 From: Jake Harmon Date: Thu, 21 Sep 2023 19:14:36 -0700 Subject: [PATCH 116/567] Restore TSL/XLA headers in tensorflow/include (again) The last attempt at this had two problems: 1) Some files were duplicated, which caused permissions errors when copied over (fixed by #61937). I still need both copies to get the right files, so added -n (no-clobber) to prevent overwriting. 2) cp -r should be cp -R to not cause problems on Mac PiperOrigin-RevId: 567484051 --- tensorflow/tools/pip_package/build_pip_package.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index a77aa82a21c5aa..07f755ac6137cc 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -256,8 +256,11 @@ function prepare_src() { # Move vendored files into proper locations # This is required because TSL/XLA don't publish their own wheels - cp -rL bazel-bin/external/local_tsl/tsl/ ${TMPDIR}/tensorflow - cp -rL bazel-bin/external/local_xla/xla/ ${TMPDIR}/tensorflow/compiler + # We copy from bazel-bin/tensorflow instead of bazel-bin/internal to copy + # headers from TSL/XLA into tensorflow so that InstallHeaders can move + # them back into tensorflow/include + cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_tsl/tsl/ ${TMPDIR}/tensorflow + cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_xla/xla/ ${TMPDIR}/tensorflow/compiler # Fix the proto stubs if is_macos; then find ${TMPDIR}/tensorflow/ -name "*.py" -type f -exec sed -i '' 's/from tsl\./from tensorflow.tsl./' {} \; From 7e47da962783934779c7611f4566407e26929d4c Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 21:08:13 -0700 Subject: [PATCH 117/567] [stream_executor] NFC: Clean up kernel_spec headers and warnings Remove unused OpenCL kernel loaders. https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 567504087 --- third_party/xla/xla/service/gpu/BUILD | 1 - third_party/xla/xla/stream_executor/BUILD | 48 ++++--- .../stream_executor/cuda/cuda_gpu_executor.cc | 14 +- .../xla/xla/stream_executor/kernel_spec.cc | 90 +++++-------- .../xla/xla/stream_executor/kernel_spec.h | 120 ++++-------------- 5 files changed, 99 insertions(+), 174 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 2427757038d855..9f2ce2858edc4d 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -3197,7 +3197,6 @@ cc_library( "//xla/hlo/ir:hlo", "//xla/service:hlo_module_config", "//xla/stream_executor", - "//xla/stream_executor:kernel_spec", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", "@local_tsl//tsl/platform:errors", diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index afbc0451827152..bae9ae09f248b3 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -88,12 +88,12 @@ filegroup( ) #===--------------------------------------------------------------------------------------------===# -# StreamExecutor platform-dependent implementation details +# StreamExecutor platform-dependent interfaces #===--------------------------------------------------------------------------------------------===# # Only platform-dependent StreamExecutor implementations (e.g. StreamExecutor for GPUs) and targets # defined by StreamExecutor itself (e.g. `event`, `kernel`, etc.) can depend on internal -# implementation details (interfaces that define platform-specific API). +# interfaces (interfaces that define platform-specific API). # # External clients of StreamExecutor should depend on `stream_executor` target (links StreamExecutor # implementation in static build configuration), or a header only `stream_executor_headers`. @@ -128,6 +128,30 @@ filegroup( visibility = ["//visibility:public"], ) +#===--------------------------------------------------------------------------------------------===# +# StreamExecutor implementation +#===--------------------------------------------------------------------------------------------===# + +# Targets that implement StreamExecutor APIs are private, and should not be used outside of +# `stream_executor` package. Clients should depend on `stream_executor` (headers and +# implementation) or `stream_executor_headers` (only headers, if there is a reason not to link +# implementation) if they want to use StreamExecutor. + +cc_library( + name = "kernel_spec", + srcs = ["kernel_spec.cc"], + hdrs = ["kernel_spec.h"], + visibility = ["//visibility:public"], + deps = [ + "//xla/stream_executor/platform", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@local_tsl//tsl/platform:logging", + ], +) + #===--------------------------------------------------------------------------------------------===# # The stream_executor_headers target does not prescribe an implementation. @@ -173,6 +197,7 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", @@ -241,19 +266,6 @@ cc_library( ], ) -cc_library( - name = "kernel_spec", - srcs = ["kernel_spec.cc"], - hdrs = ["kernel_spec.h"], - visibility = ["//visibility:public"], - deps = [ - "//xla/stream_executor/platform", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@local_tsl//tsl/platform:logging", - ], -) - cc_library( name = "kernel_cache_config", hdrs = ["kernel_cache_config.h"], @@ -355,6 +367,7 @@ cc_library( ":plugin", ":stream_executor_headers", "//xla/stream_executor/platform", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", @@ -436,6 +449,7 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", @@ -598,6 +612,7 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", @@ -792,6 +807,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":stream_executor_headers", + "@com_google_absl//absl/log:check", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:status", @@ -847,6 +863,7 @@ cc_library( ":stream_executor_headers", "//xla/stream_executor/platform", "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/log:check", "@local_tsl//tsl/platform:statusor", ], ) @@ -884,6 +901,7 @@ cc_library( ":device_memory_allocator", ":platform", ":stream_executor_headers", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/synchronization", "@local_tsl//tsl/framework:allocator", "@local_tsl//tsl/platform:errors", diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc b/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc index 992b9a53990a68..0d882033ee56e5 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc @@ -183,18 +183,18 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, KernelBase* kernel) { GpuKernel* cuda_kernel = AsGpuKernel(kernel); CUmodule module; - const std::string* kernelname; + const std::string* kernel_name; VLOG(3) << "GetKernel on kernel " << kernel << " : " << kernel->name(); if (spec.has_cuda_cubin_in_memory()) { absl::MutexLock lock{&in_memory_modules_mu_}; - kernelname = &spec.cuda_cubin_in_memory().kernelname(); + kernel_name = &spec.cuda_cubin_in_memory().kernel_name(); const char* cubin = spec.cuda_cubin_in_memory().bytes(); TF_RETURN_IF_ERROR(LoadModuleFromCuBin(cubin, &module)); kernel_to_gpu_binary_[kernel] = cubin; } else if (spec.has_cuda_ptx_in_memory()) { - kernelname = &spec.cuda_ptx_in_memory().kernelname(); + kernel_name = &spec.cuda_ptx_in_memory().kernel_name(); if (cc_major_ == 0 && cc_minor_ == 0) { return tsl::errors::Internal("Compute capability not set"); @@ -205,7 +205,7 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, ptx = spec.cuda_ptx_in_memory().default_text(); } if (ptx == nullptr) { - LOG(FATAL) << "Loader spec has no ptx for kernel " << *kernelname; + LOG(FATAL) << "Loader spec has no ptx for kernel " << *kernel_name; } absl::MutexLock lock{&in_memory_modules_mu_}; @@ -214,9 +214,9 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, } else { return tsl::errors::Internal("No method of loading CUDA kernel provided"); } - VLOG(2) << "getting function " << *kernelname << " from module " << module; + VLOG(2) << "getting function " << *kernel_name << " from module " << module; TF_RETURN_IF_ERROR(GpuDriver::GetModuleFunction( - context_, module, kernelname->c_str(), cuda_kernel->gpu_function_ptr())); + context_, module, kernel_name->c_str(), cuda_kernel->gpu_function_ptr())); // We have to trust the kernel loader spec arity because there doesn't appear // to be a way to reflect on the number of expected arguments w/the CUDA API. @@ -225,7 +225,7 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, KernelMetadata kernel_metadata; TF_RETURN_IF_ERROR(GetKernelMetadata(cuda_kernel, &kernel_metadata)); kernel->set_metadata(kernel_metadata); - kernel->set_name(*kernelname); + kernel->set_name(*kernel_name); return ::tsl::OkStatus(); } diff --git a/third_party/xla/xla/stream_executor/kernel_spec.cc b/third_party/xla/xla/stream_executor/kernel_spec.cc index a8efbb398fc485..f0fabf44d5399e 100644 --- a/third_party/xla/xla/stream_executor/kernel_spec.cc +++ b/third_party/xla/xla/stream_executor/kernel_spec.cc @@ -15,28 +15,37 @@ limitations under the License. #include "xla/stream_executor/kernel_spec.h" +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" #include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" namespace stream_executor { -KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernelname) - : kernelname_(std::string(kernelname)) {} +KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernel_name) + : kernel_name_(std::string(kernel_name)) {} OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(absl::string_view filename, - absl::string_view kernelname) - : KernelLoaderSpec(kernelname), filename_(std::string(filename)) {} + absl::string_view kernel_name) + : KernelLoaderSpec(kernel_name), filename_(std::string(filename)) {} CudaPtxOnDisk::CudaPtxOnDisk(absl::string_view filename, - absl::string_view kernelname) - : OnDiskKernelLoaderSpec(filename, kernelname) {} + absl::string_view kernel_name) + : OnDiskKernelLoaderSpec(filename, kernel_name) {} CudaCubinOnDisk::CudaCubinOnDisk(absl::string_view filename, - absl::string_view kernelname) - : OnDiskKernelLoaderSpec(filename, kernelname) {} + absl::string_view kernel_name) + : OnDiskKernelLoaderSpec(filename, kernel_name) {} CudaCubinInMemory::CudaCubinInMemory(const char *bytes, - absl::string_view kernelname) - : KernelLoaderSpec(kernelname), bytes_(bytes) {} + absl::string_view kernel_name) + : KernelLoaderSpec(kernel_name), bytes_(bytes) {} bool CompareComputeCapability(const std::tuple &lhs, const std::tuple &rhs) { @@ -157,91 +166,58 @@ const char *CudaPtxInMemory::original_text(int compute_capability_major, return ptx_iter->second; } -OpenCLTextOnDisk::OpenCLTextOnDisk(absl::string_view filename, - absl::string_view kernelname) - : OnDiskKernelLoaderSpec(filename, kernelname) {} - -OpenCLTextInMemory::OpenCLTextInMemory(absl::string_view text, - absl::string_view kernelname) - : KernelLoaderSpec(kernelname), text_(text) {} - -OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(absl::string_view filename, - absl::string_view kernelname) - : OnDiskKernelLoaderSpec(filename, kernelname) {} - -MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk( - absl::string_view filename, absl::string_view kernelname) { - CHECK(ocl_text_on_disk_ == nullptr); - ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname}); - return this; -} - -MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk( - absl::string_view filename, absl::string_view kernelname) { - CHECK(ocl_binary_on_disk_ == nullptr); - ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname}); - return this; -} - -MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory( - absl::string_view filename, absl::string_view kernelname) { - CHECK(ocl_text_in_memory_ == nullptr); - ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname}); - return this; -} - MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk( - absl::string_view filename, absl::string_view kernelname) { + absl::string_view filename, absl::string_view kernel_name) { CHECK(cuda_ptx_on_disk_ == nullptr); - cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname}); + cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernel_name}); return this; } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory( - const char *bytes, absl::string_view kernelname) { + const char *bytes, absl::string_view kernel_name) { CHECK(cuda_cubin_in_memory_ == nullptr); - cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname}); + cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernel_name}); return this; } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk( - absl::string_view filename, absl::string_view kernelname) { + absl::string_view filename, absl::string_view kernel_name) { CHECK(cuda_cubin_on_disk_ == nullptr); - cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname}); + cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernel_name}); return this; } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory( - absl::string_view ptx, absl::string_view kernelname) { + absl::string_view ptx, absl::string_view kernel_name) { CHECK(cuda_ptx_in_memory_ == nullptr); cuda_ptx_in_memory_.reset( - new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */}); + new CudaPtxInMemory{ptx, kernel_name, false /* ptx_compressed */}); return this; } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory( - absl::string_view ptx, absl::string_view kernelname) { + absl::string_view ptx, absl::string_view kernel_name) { CHECK(cuda_ptx_in_memory_ == nullptr); cuda_ptx_in_memory_.reset( - new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */}); + new CudaPtxInMemory{ptx, kernel_name, true /* ptx_compressed */}); return this; } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory( std::initializer_list spec_list, - absl::string_view kernelname) { + absl::string_view kernel_name) { CHECK(cuda_ptx_in_memory_ == nullptr); cuda_ptx_in_memory_.reset( - new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */}); + new CudaPtxInMemory{spec_list, kernel_name, false /* ptx_compressed */}); return this; } MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory( std::initializer_list spec_list, - absl::string_view kernelname) { + absl::string_view kernel_name) { CHECK(cuda_ptx_in_memory_ == nullptr); cuda_ptx_in_memory_.reset( - new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */}); + new CudaPtxInMemory{spec_list, kernel_name, true /* ptx_compressed */}); return this; } diff --git a/third_party/xla/xla/stream_executor/kernel_spec.h b/third_party/xla/xla/stream_executor/kernel_spec.h index 70ee8b607faa00..4d28b5b6c72dca 100644 --- a/third_party/xla/xla/stream_executor/kernel_spec.h +++ b/third_party/xla/xla/stream_executor/kernel_spec.h @@ -26,19 +26,16 @@ limitations under the License. // static const MultiKernelLoaderSpec &SaxpySpec() { // static auto *mkls = // (new MultiKernelLoaderSpec{4 /* = arity */}) -// ->AddCudaPtxOnDisk(ptx_file_path, ptx_kernelname) -// ->AddOpenCLTextOnDisk(opencl_text_file_path, ocl_kernelname); +// ->AddCudaPtxOnDisk(ptx_file_path, ptx_kernel_name); // }; // // return *mkls; // } // // This lazily instantiates an object that describes how to load CUDA PTX -// present on disk that implements saxpy for the for the CUDA platform, or -// OpenCL text present on disk that implements saxpy for an OpenCL-based -// platform. The CudaPtxOnDisk and OpenCLTextOnDisk objects are subtypes of -// KernelLoaderSpec -- KernelLoaderSpec describes how to load a kernel for -// subsequent launching on a single platform. +// present on disk that implements saxpy for the CUDA platform. The +// CudaPtxOnDisk object is a subtype of KernelLoaderSpec -- KernelLoaderSpec +// describes how to load a kernel for subsequent launching on a single platform. // // For the loader functionality that accepts these KernelLoaderSpecs in order // to grab the kernel appropriately, see StreamExecutor::GetKernel(). @@ -48,9 +45,13 @@ limitations under the License. #include +#include #include #include +#include +#include +#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" #include "xla/stream_executor/platform/port.h" @@ -73,15 +74,15 @@ class KernelLoaderSpec { virtual ~KernelLoaderSpec() {} // Returns the kernel name to load out of the program. - const std::string &kernelname() const { return kernelname_; } + const std::string &kernel_name() const { return kernel_name_; } protected: - explicit KernelLoaderSpec(absl::string_view kernelname); + explicit KernelLoaderSpec(absl::string_view kernel_name); private: // The kernel name that should be loaded out of the program description given // above. - std::string kernelname_; + std::string kernel_name_; SE_DISALLOW_COPY_AND_ASSIGN(KernelLoaderSpec); }; @@ -102,7 +103,7 @@ class OnDiskKernelLoaderSpec : public KernelLoaderSpec { protected: OnDiskKernelLoaderSpec(absl::string_view filename, - absl::string_view kernelname); + absl::string_view kernel_name); std::string filename_; @@ -113,7 +114,7 @@ class OnDiskKernelLoaderSpec : public KernelLoaderSpec { // Kernel loader specification for PTX text that resides on disk. class CudaPtxOnDisk : public OnDiskKernelLoaderSpec { public: - CudaPtxOnDisk(absl::string_view filename, absl::string_view kernelname); + CudaPtxOnDisk(absl::string_view filename, absl::string_view kernel_name); ~CudaPtxOnDisk() override {} const char *CanonicalSuffix() const override { return ".ptx"; } @@ -125,7 +126,7 @@ class CudaPtxOnDisk : public OnDiskKernelLoaderSpec { // Kernel loader specification for CUBIN binary that resides on disk. class CudaCubinOnDisk : public OnDiskKernelLoaderSpec { public: - CudaCubinOnDisk(absl::string_view filename, absl::string_view kernelname); + CudaCubinOnDisk(absl::string_view filename, absl::string_view kernel_name); ~CudaCubinOnDisk() override {} const std::string &filename() const { return filename_; } @@ -153,7 +154,7 @@ class CudaPtxInMemory : public KernelLoaderSpec { // // Warning: the string backing the provided absl::string_view ptx must outlive // this instance. - CudaPtxInMemory(absl::string_view ptx, absl::string_view kernelname, + CudaPtxInMemory(absl::string_view ptx, absl::string_view kernel_name, bool ptx_compressed = false); // Multiple-PTX-version constructor. Adds each item in spec_list to this @@ -215,50 +216,10 @@ class CudaPtxInMemory : public KernelLoaderSpec { SE_DISALLOW_COPY_AND_ASSIGN(CudaPtxInMemory); }; -// Kernel loader specification for OpenCL text that resides on disk. -class OpenCLTextOnDisk : public OnDiskKernelLoaderSpec { - public: - OpenCLTextOnDisk(absl::string_view filename, absl::string_view kernelname); - ~OpenCLTextOnDisk() override {} - - const char *CanonicalSuffix() const override { return ".ocl"; } - - private: - SE_DISALLOW_COPY_AND_ASSIGN(OpenCLTextOnDisk); -}; - -// Kernel loader specification for OpenCL binary that resides on disk. -class OpenCLBinaryOnDisk : public OnDiskKernelLoaderSpec { - public: - OpenCLBinaryOnDisk(absl::string_view filename, absl::string_view kernelname); - ~OpenCLBinaryOnDisk() override {} - - const char *CanonicalSuffix() const override { return ".aocx"; } - - private: - SE_DISALLOW_COPY_AND_ASSIGN(OpenCLBinaryOnDisk); -}; - -// Kernel loader specification for OpenCL text that resides in memory. -class OpenCLTextInMemory : public KernelLoaderSpec { - public: - OpenCLTextInMemory(absl::string_view text, absl::string_view kernelname); - ~OpenCLTextInMemory() override {} - - // Returns the OpenCL text contents. - const std::string &text() const { return text_; } - - private: - // OpenCL translation unit text contents in memory. - std::string text_; - - SE_DISALLOW_COPY_AND_ASSIGN(OpenCLTextInMemory); -}; - // Kernel loader specification for a CUBIN blob that resides in memory. class CudaCubinInMemory : public KernelLoaderSpec { public: - CudaCubinInMemory(const char *bytes, absl::string_view kernelname); + CudaCubinInMemory(const char *bytes, absl::string_view kernel_name); ~CudaCubinInMemory() override {} const char *bytes() const { return bytes_; } @@ -285,9 +246,6 @@ class MultiKernelLoaderSpec { return cuda_cubin_in_memory_ != nullptr; } bool has_cuda_ptx_in_memory() const { return cuda_ptx_in_memory_ != nullptr; } - bool has_ocl_text_on_disk() const { return ocl_text_on_disk_ != nullptr; } - bool has_ocl_binary_on_disk() const { return ocl_binary_on_disk_ != nullptr; } - bool has_ocl_text_in_memory() const { return ocl_text_in_memory_ != nullptr; } // Accessors for platform variant kernel load specifications. // Precondition: corresponding has_* is true. @@ -307,49 +265,29 @@ class MultiKernelLoaderSpec { CHECK(has_cuda_ptx_in_memory()); return *cuda_ptx_in_memory_; } - const OpenCLTextOnDisk &ocl_text_on_disk() const { - CHECK(has_ocl_text_on_disk()); - return *ocl_text_on_disk_; - } - const OpenCLBinaryOnDisk &ocl_binary_on_disk() const { - CHECK(has_ocl_binary_on_disk()); - return *ocl_binary_on_disk_; - } - const OpenCLTextInMemory &ocl_text_in_memory() const { - CHECK(has_ocl_text_in_memory()); - return *ocl_text_in_memory_; - } - // Builder-pattern-like methods for use in initializing a // MultiKernelLoaderSpec. Each of these should be used at most once for a // single MultiKernelLoaderSpec object. See file comment for example usage. // - // Note that the kernelname parameter must be consistent with the kernel in - // the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel - // name may be mangled by the compiler if it is not declared in an - // extern "C" scope. - MultiKernelLoaderSpec *AddOpenCLTextOnDisk(absl::string_view filename, - absl::string_view kernelname); - MultiKernelLoaderSpec *AddOpenCLBinaryOnDisk(absl::string_view filename, - absl::string_view kernelname); - MultiKernelLoaderSpec *AddOpenCLTextInMemory(absl::string_view ocl_text, - absl::string_view kernelname); + // Note that the kernel_name parameter must be consistent with the kernel in + // the PTX being loaded. Also be aware that in CUDA C++ the kernel name may be + // mangled by the compiler if it is not declared in an extern "C" scope. MultiKernelLoaderSpec *AddCudaPtxOnDisk(absl::string_view filename, - absl::string_view kernelname); + absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaCubinOnDisk(absl::string_view filename, - absl::string_view kernelname); + absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaCubinInMemory(const char *cubin_bytes, - absl::string_view kernelname); + absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaPtxInMemory(absl::string_view ptx, - absl::string_view kernelname); + absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaCompressedPtxInMemory( - absl::string_view ptx, absl::string_view kernelname); + absl::string_view ptx, absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaPtxInMemory( std::initializer_list spec_list, - absl::string_view kernelname); + absl::string_view kernel_name); MultiKernelLoaderSpec *AddCudaCompressedPtxInMemory( std::initializer_list spec_list, - absl::string_view kernelname); + absl::string_view kernel_name); private: std::unique_ptr @@ -360,12 +298,6 @@ class MultiKernelLoaderSpec { cuda_cubin_in_memory_; // Binary CUDA program in memory. std::unique_ptr cuda_ptx_in_memory_; // PTX text that resides in memory. - std::unique_ptr - ocl_text_on_disk_; // OpenCL text that resides on disk. - std::unique_ptr - ocl_binary_on_disk_; // OpenCL binary that resides on disk. - std::unique_ptr - ocl_text_in_memory_; // OpenCL text that resides in memory. // Number of parameters that the kernel takes. (This is nicer to have in a // constexpr than having to determine it from the types via template From 1518a980e1e0af880a7de819106a522c72f76036 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 21:36:56 -0700 Subject: [PATCH 118/567] Internal change only. PiperOrigin-RevId: 567508075 --- third_party/xla/third_party/tsl/tsl/platform/windows/env.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/third_party/tsl/tsl/platform/windows/env.cc b/third_party/xla/third_party/tsl/tsl/platform/windows/env.cc index 4dd129aadabb91..13fb4515d5a9fd 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/windows/env.cc +++ b/third_party/xla/third_party/tsl/tsl/platform/windows/env.cc @@ -35,7 +35,7 @@ limitations under the License. #include "tsl/platform/windows/windows_file_system.h" #include "tsl/protobuf/error_codes.pb.h" -#pragma comment(lib, "Shlwapi.lib") +#pragma comment(lib, "shlwapi.lib") namespace tsl { From 9adcd215fc5da2c8f2c2270c47bcfc373fb65e7a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 21:57:33 -0700 Subject: [PATCH 119/567] Internal change only. PiperOrigin-RevId: 567511750 --- .../tsl/tsl/profiler/backends/cpu/annotation_stack.cc | 9 ++++++++- .../tsl/tsl/profiler/backends/cpu/traceme_recorder.cc | 10 +++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/annotation_stack.cc b/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/annotation_stack.cc index 529ef95387a6b6..97b4c5daeb373e 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/annotation_stack.cc +++ b/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/annotation_stack.cc @@ -23,7 +23,14 @@ namespace tsl { namespace profiler { namespace internal { -std::atomic g_annotation_enabled(0); +#ifdef _WIN32 +#define DECL_DLL_EXPORT __declspec(dllexport) +#else +#define DECL_DLL_EXPORT +#endif +// DLL imported variables cannot be initialized on Windows. This file is +// included only on DLL exports. +DECL_DLL_EXPORT std::atomic g_annotation_enabled(0); // g_annotation_enabled implementation must be lock-free for faster execution of // the ScopedAnnotation API. This can be commented (if compilation is failing) diff --git a/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/traceme_recorder.cc b/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/traceme_recorder.cc index 7cd2d658bcdc49..3d4c7f6c289217 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/traceme_recorder.cc +++ b/third_party/xla/third_party/tsl/tsl/profiler/backends/cpu/traceme_recorder.cc @@ -34,7 +34,15 @@ namespace tsl { namespace profiler { namespace internal { -std::atomic g_trace_level(TraceMeRecorder::kTracingDisabled); +#ifdef _WIN32 +#define DECL_DLL_EXPORT __declspec(dllexport) +#else +#define DECL_DLL_EXPORT +#endif +// DLL imported variables cannot be initialized on Windows. This file is +// included only on DLL exports. +DECL_DLL_EXPORT std::atomic g_trace_level( + TraceMeRecorder::kTracingDisabled); // g_trace_level implementation must be lock-free for faster execution of the // TraceMe API. This can be commented (if compilation is failing) but execution From e24946f338a92e7ee731d7fe8d765b544054821d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 22:16:36 -0700 Subject: [PATCH 120/567] Internal Code Change PiperOrigin-RevId: 567515434 --- tensorflow/cc/saved_model/BUILD | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD index 026ac1683fe971..d0f2fb7324eb6e 100644 --- a/tensorflow/cc/saved_model/BUILD +++ b/tensorflow/cc/saved_model/BUILD @@ -388,7 +388,6 @@ cc_library( visibility = [ "//tensorflow:__pkg__", "//tensorflow/python:__pkg__", - "//tensorflow/security/fuzzing/cc/ops:__pkg__", # TODO(b/261455394): Remove. ], deps = [ "//tensorflow/core:protos_all_cc", @@ -493,7 +492,6 @@ cc_library( visibility = [ "//tensorflow:__pkg__", "//tensorflow/python:__pkg__", - "//tensorflow/security/fuzzing/cc/ops:__pkg__", # TODO(b/261455394): Remove. ], deps = [ ":constants", From a851496e9c8b75448cb3052352d14674ee972237 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 22:19:52 -0700 Subject: [PATCH 121/567] Internal Code Change PiperOrigin-RevId: 567516040 --- third_party/xla/xla/translate/hlo_to_mhlo/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/translate/hlo_to_mhlo/tests/BUILD b/third_party/xla/xla/translate/hlo_to_mhlo/tests/BUILD index dc9d7e34e2390a..0a9bd841a9663c 100644 --- a/third_party/xla/xla/translate/hlo_to_mhlo/tests/BUILD +++ b/third_party/xla/xla/translate/hlo_to_mhlo/tests/BUILD @@ -3,6 +3,7 @@ load("//xla:glob_lit_test.bzl", "glob_lit_tests") package( default_visibility = ["//visibility:public"], + # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], licenses = ["notice"], ) From 19c8aba6902f54cab505a17f76a9214bf615ab3d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 22:36:26 -0700 Subject: [PATCH 122/567] Internal change only. PiperOrigin-RevId: 567519140 --- .../tsl/tsl/platform/windows/subprocess.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc b/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc index e31432a0047575..c0c948bbc1814a 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc +++ b/third_party/xla/third_party/tsl/tsl/platform/windows/subprocess.cc @@ -110,9 +110,11 @@ SubProcess::~SubProcess() { mutex_lock procLock(proc_mu_); mutex_lock dataLock(data_mu_); if (win_pi_) { - CloseHandle(reinterpret_cast(win_pi_)->hProcess); - CloseHandle(reinterpret_cast(win_pi_)->hThread); - delete win_pi_; + auto* pi = reinterpret_cast(win_pi_); + CloseHandle(pi->hProcess); + CloseHandle(pi->hThread); + delete pi; + win_pi_ = nullptr; } running_ = false; FreeArgs(); @@ -364,9 +366,11 @@ int SubProcess::Communicate(const string* stdin_input, string* stdout_output, // Lock data_mu_ but not proc_mu_ while communicating with the child process // in order for Kill() to be able to terminate the child from another thread. data_mu_.lock(); - if (!IsProcessFinished( - reinterpret_cast(win_pi_)->hProcess) || - (parent_pipe_[CHAN_STDOUT] != nullptr) || + proc_mu_.lock(); + bool process_finished = IsProcessFinished( + reinterpret_cast(win_pi_)->hProcess); + proc_mu_.unlock(); + if (!process_finished || (parent_pipe_[CHAN_STDOUT] != nullptr) || (parent_pipe_[CHAN_STDERR] != nullptr)) { if (parent_pipe_[CHAN_STDIN] != nullptr) { if (stdin_input) { @@ -422,6 +426,7 @@ int SubProcess::Communicate(const string* stdin_input, string* stdout_output, if (wait_result != WAIT_OBJECT_0) { LOG(ERROR) << "Waiting on the io threads failed! result: " << wait_result << std::endl; + data_mu_.unlock(); return -1; } From acd860ca55cf3773ca2a1674dcfac5024a878bc0 Mon Sep 17 00:00:00 2001 From: Jake Harmon Date: Thu, 21 Sep 2023 22:41:24 -0700 Subject: [PATCH 123/567] Add ml_dtypes headers to tensorflow/include PiperOrigin-RevId: 567520259 --- tensorflow/tools/pip_package/setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 755405fbf15eea..80b6e28ed1fc1c 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -265,6 +265,7 @@ def mkdir_and_copy_file(self, header): external_header_locations = { '/tensorflow/include/external/eigen_archive': '', '/tensorflow/include/external/com_google_absl': '', + '/tensorflow/include/external/ml_dtypes': '/ml_dtypes', '/tensorflow/include/tensorflow/compiler/xla': '/tensorflow/include/xla', '/tensorflow/include/tensorflow/tsl': '/tensorflow/include/tsl', } @@ -345,7 +346,8 @@ def find_files(pattern, root): list(find_files('*', 'third_party/gpus')) + list(find_files('*.h', 'tensorflow/include/external/com_google_absl')) + list(find_files('*.inc', 'tensorflow/include/external/com_google_absl')) + - list(find_files('*', 'tensorflow/include/external/eigen_archive'))) + list(find_files('*', 'tensorflow/include/external/eigen_archive')) + + list(find_files('*.h', 'tensorflow/include/external/ml_dtypes'))) # Quite a lot of setup() options are different if this is a collaborator package # build. We explicitly list the differences here, then unpack the dict as From b73d62ac5c0585f3ddd34769bec51e12afb8389c Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 21 Sep 2023 22:42:11 -0700 Subject: [PATCH 124/567] [stream_executor] NFC: Clean up xla/stream_executor:executor_cache depencencies and fix warnings PiperOrigin-RevId: 567520429 --- .../c/experimental/stream_executor/BUILD | 5 +- .../stream_executor/stream_executor.cc | 3 +- .../stream_executor_internal.h | 1 + .../xla/service/gpu/runtime/collectives.cc | 1 - .../xla/xla/service/gpu/runtime/collectives.h | 1 - third_party/xla/xla/stream_executor/BUILD | 51 ++------------- .../xla/xla/stream_executor/executor_cache.cc | 65 ++++++++++--------- .../xla/xla/stream_executor/executor_cache.h | 27 +++++--- 8 files changed, 63 insertions(+), 91 deletions(-) diff --git a/tensorflow/c/experimental/stream_executor/BUILD b/tensorflow/c/experimental/stream_executor/BUILD index 39b81d93fb1723..5cf55e03aa778e 100644 --- a/tensorflow/c/experimental/stream_executor/BUILD +++ b/tensorflow/c/experimental/stream_executor/BUILD @@ -45,10 +45,9 @@ cc_library( "//tensorflow/core/common_runtime/device:device_utils", "//tensorflow/core/platform:strcat", "@com_google_absl//absl/functional:any_invocable", - "@local_xla//xla/stream_executor:executor_cache", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor:multi_platform_manager", "@local_xla//xla/stream_executor:platform", - "@local_xla//xla/stream_executor:stream_executor_pimpl", ], ) @@ -66,7 +65,7 @@ cc_library( "//tensorflow/c:c_api_macros", "//tensorflow/c:tf_status", "//tensorflow/c:tf_status_helper", - "@local_xla//xla/stream_executor:executor_cache", + "@local_xla//xla/stream_executor", ], ) diff --git a/tensorflow/c/experimental/stream_executor/stream_executor.cc b/tensorflow/c/experimental/stream_executor/stream_executor.cc index 1e2ceee4656049..3fcd255a2248ab 100644 --- a/tensorflow/c/experimental/stream_executor/stream_executor.cc +++ b/tensorflow/c/experimental/stream_executor/stream_executor.cc @@ -33,8 +33,7 @@ limitations under the License. #include "xla/stream_executor/multi_platform_manager.h" #include "xla/stream_executor/platform.h" #include "xla/stream_executor/stream.h" -#include "xla/stream_executor/stream_executor_internal.h" -#include "xla/stream_executor/stream_executor_pimpl.h" +#include "xla/stream_executor/stream_executor.h" #include "tensorflow/core/common_runtime/device/device_utils.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/errors.h" diff --git a/tensorflow/c/experimental/stream_executor/stream_executor_internal.h b/tensorflow/c/experimental/stream_executor/stream_executor_internal.h index 14028fc7d6eec3..6792a61f7bb8cc 100644 --- a/tensorflow/c/experimental/stream_executor/stream_executor_internal.h +++ b/tensorflow/c/experimental/stream_executor/stream_executor_internal.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/c/tf_status_helper.h" #include "xla/stream_executor/executor_cache.h" #include "xla/stream_executor/platform.h" +#include "xla/stream_executor/stream_executor.h" namespace stream_executor { diff --git a/third_party/xla/xla/service/gpu/runtime/collectives.cc b/third_party/xla/xla/service/gpu/runtime/collectives.cc index 1c702fee5ce5c9..2b28efbe470b24 100644 --- a/third_party/xla/xla/service/gpu/runtime/collectives.cc +++ b/third_party/xla/xla/service/gpu/runtime/collectives.cc @@ -35,7 +35,6 @@ limitations under the License. #include "xla/service/gpu/runtime/support.h" #include "xla/service/gpu/thunk.h" #include "xla/service/service_executable_run_options.h" -#include "xla/stream_executor/stream.h" namespace xla { namespace gpu { diff --git a/third_party/xla/xla/service/gpu/runtime/collectives.h b/third_party/xla/xla/service/gpu/runtime/collectives.h index ca0e2b0203573d..23bd700cc151fc 100644 --- a/third_party/xla/xla/service/gpu/runtime/collectives.h +++ b/third_party/xla/xla/service/gpu/runtime/collectives.h @@ -23,7 +23,6 @@ limitations under the License. #include "xla/runtime/custom_call_registry.h" #include "xla/service/gpu/nccl_collective_thunk.h" #include "xla/stream_executor/event.h" -#include "xla/stream_executor/stream.h" namespace xla { namespace gpu { diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index bae9ae09f248b3..6783578a4bc150 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -196,6 +196,7 @@ cc_library( "//xla/stream_executor/platform", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", @@ -406,63 +407,25 @@ cc_library( cc_library( name = "executor_cache", - srcs = [ - "device_description.h", - "device_memory.h", - "device_options.h", - "event.h", - "executor_cache.cc", - "launch_dim.h", - "plugin.h", - "plugin_registry.h", - "stream_executor_pimpl.h", - "temporary_device_memory.h", - "temporary_memory_manager.h", - ], - hdrs = [ - "blas.h", - "executor_cache.h", - "fft.h", - "kernel.h", - "kernel_cache_config.h", - "kernel_spec.h", - "platform.h", - "stream.h", - "stream_executor_internal.h", - "trace_listener.h", - ], + srcs = ["executor_cache.cc"], + hdrs = ["executor_cache.h"], visibility = ["//visibility:public"], deps = [ - ":allocator_stats", - ":data_type", - ":device_description", - ":device_description_proto_cc", - ":device_memory", - ":device_options", - ":fft", - ":kernel_cache_config", - ":kernel_spec", - ":launch_dim", - ":plugin", - ":stream_executor_headers", + ":platform", + ":stream_executor_pimpl_header", "//xla/stream_executor/platform", - "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:env", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:logging", - "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", - "@local_tsl//tsl/protobuf:dnn_proto_cc", ], ) diff --git a/third_party/xla/xla/stream_executor/executor_cache.cc b/third_party/xla/xla/stream_executor/executor_cache.cc index f12af19b72f6b2..10b3627f7aee3a 100644 --- a/third_party/xla/xla/stream_executor/executor_cache.cc +++ b/third_party/xla/xla/stream_executor/executor_cache.cc @@ -16,20 +16,27 @@ limitations under the License. #include "xla/stream_executor/executor_cache.h" #include +#include +#include "absl/log/log.h" +#include "absl/status/status.h" #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" +#include "xla/stream_executor/platform.h" +#include "xla/stream_executor/stream_executor_pimpl.h" +#include "tsl/platform/statusor.h" namespace stream_executor { +ExecutorCache::ExecutorCache() = default; +ExecutorCache::~ExecutorCache() { DestroyAllExecutors(); } + tsl::StatusOr ExecutorCache::GetOrCreate( - const StreamExecutorConfig& config, - const std::function& factory) { + const StreamExecutorConfig& config, const ExecutorFactory& factory) { // In the fast path case, the cache already has an entry and we can just // return after Get() which only takes a shared lock and not a unique lock. // If we need to create, we take a unique lock on cache_. - auto fast_result = Get(config); - if (fast_result.ok()) { + if (auto fast_result = Get(config); fast_result.ok()) { return fast_result; } @@ -38,7 +45,7 @@ tsl::StatusOr ExecutorCache::GetOrCreate( absl::MutexLock lock{&mutex_}; entry = &cache_[config.ordinal]; // Release the map lock; the address of 'entry' is stable because - // std::map guarantees reference stability. + // absl::node_hash_map guarantees reference stability. } // Acquire the per-Entry mutex without holding the map mutex. Initializing @@ -70,47 +77,43 @@ tsl::StatusOr ExecutorCache::Get( { absl::ReaderMutexLock lock{&mutex_}; - { - if (config.gpu_stream) { - // Need to iterate through all stored executors. - for (auto& [ordinal, e] : cache_) { - absl::ReaderMutexLock l{&e.configurations_mutex}; - for (auto& [c, executor] : e.configurations) { - if (executor->FindAllocatedStream(config.gpu_stream)) { - return executor.get(); - } + // If gpu stream is not nullptr we have to find StreamExecutor that owns it, + // and return NOT_FOUND error if we can't find it. + if (config.gpu_stream) { + for (auto& [ordinal, e] : cache_) { + absl::ReaderMutexLock l{&e.configurations_mutex}; + for (auto& [c, executor] : e.configurations) { + if (executor->FindAllocatedStream(config.gpu_stream)) { + return executor.get(); } } - return tsl::Status( - absl::StatusCode::kNotFound, - absl::StrFormat("No executors own stream %p", config.gpu_stream)); } + return absl::NotFoundError( + absl::StrFormat("No executors own stream %p", config.gpu_stream)); } - auto it = cache_.find(config.ordinal); - if (it != cache_.end()) { + if (auto it = cache_.find(config.ordinal); it != cache_.end()) { entry = &it->second; } else { - return tsl::Status( - absl::StatusCode::kNotFound, - absl::StrFormat("No executors registered for ordinal %d", - config.ordinal)); + return absl::NotFoundError(absl::StrFormat( + "No executors registered for ordinal %d", config.ordinal)); } } + absl::ReaderMutexLock lock{&entry->configurations_mutex}; if (entry->configurations.empty()) { - return tsl::Status(absl::StatusCode::kNotFound, - absl::StrFormat("No executors registered for ordinal %d", - config.ordinal)); + return absl::NotFoundError(absl::StrFormat( + "No executors registered for ordinal %d", config.ordinal)); } - for (const auto& iter : entry->configurations) { - if (iter.first.device_options == config.device_options) { + + for (auto& [entry_config, entry_executor] : entry->configurations) { + if (entry_config.device_options == config.device_options) { VLOG(2) << "hit in cache for device ordinal " << config.ordinal; - return iter.second.get(); + return entry_executor.get(); } } - return tsl::Status(absl::StatusCode::kNotFound, - "No executor found with a matching config."); + + return absl::NotFoundError("No executor found with a matching config."); } void ExecutorCache::DestroyAllExecutors() { diff --git a/third_party/xla/xla/stream_executor/executor_cache.h b/third_party/xla/xla/stream_executor/executor_cache.h index cef0d3d565614d..d845b8eabb5ce3 100644 --- a/third_party/xla/xla/stream_executor/executor_cache.h +++ b/third_party/xla/xla/stream_executor/executor_cache.h @@ -17,28 +17,37 @@ limitations under the License. #define XLA_STREAM_EXECUTOR_EXECUTOR_CACHE_H_ #include -#include +#include +#include +#include +#include "absl/base/thread_annotations.h" +#include "absl/container/node_hash_map.h" #include "absl/synchronization/mutex.h" -#include "xla/stream_executor/stream_executor_pimpl.h" -#include "tsl/platform/status.h" +#include "xla/stream_executor/platform.h" +#include "xla/stream_executor/platform/port.h" #include "tsl/platform/statusor.h" namespace stream_executor { +// Forward declare. +class StreamExecutor; + // Utility class to allow Platform objects to manage cached StreamExecutors. // Thread-safe. class ExecutorCache { public: - ExecutorCache() {} + using ExecutorFactory = + std::function>()>; + + ExecutorCache(); + ~ExecutorCache(); // Looks up 'config' in the cache. Returns a pointer to the existing executor, // if already present, or creates it using 'factory', if it does not. // Factories may be executed concurrently for different device ordinals. - typedef tsl::StatusOr> ExecutorFactory(); - tsl::StatusOr GetOrCreate( - const StreamExecutorConfig& config, - const std::function& factory); + tsl::StatusOr GetOrCreate(const StreamExecutorConfig& config, + const ExecutorFactory& factory); // Returns a pointer to the described executor (if one with a matching config // has been created), or a NOT_FOUND status. @@ -70,7 +79,7 @@ class ExecutorCache { // We key off of ordinal (instead of just looking up all fields in the // StreamExecutorConfig) for a slight improvement in lookup time. absl::Mutex mutex_; - std::map cache_ ABSL_GUARDED_BY(mutex_); + absl::node_hash_map cache_ ABSL_GUARDED_BY(mutex_); SE_DISALLOW_COPY_AND_ASSIGN(ExecutorCache); }; From 41154a69d36384c90d75043a8052c045e037b084 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 21 Sep 2023 23:09:51 -0700 Subject: [PATCH 125/567] Internal Code Change PiperOrigin-RevId: 567527024 --- tensorflow/core/common_runtime/device/BUILD | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/device/BUILD b/tensorflow/core/common_runtime/device/BUILD index 3cddb136424737..12eb86fd0bd391 100644 --- a/tensorflow/core/common_runtime/device/BUILD +++ b/tensorflow/core/common_runtime/device/BUILD @@ -20,10 +20,7 @@ load( package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], - default_visibility = [ - "//tensorflow:internal", - "//tensorflow_models:__subpackages__", - ], + default_visibility = ["//tensorflow:internal"], # features = ["-parse_headers"], features = ["-layering_check"], licenses = ["notice"], From 2635d85c1cac2562197f5aedfeba4274c0454e9c Mon Sep 17 00:00:00 2001 From: Marcello Maggioni Date: Fri, 22 Sep 2023 00:36:41 -0700 Subject: [PATCH 126/567] [XLA] Fix big memory allocation and compile time slowdown for WhileLoopInvariantCodeMotion. The pass allocates in some cases the input for the while-cond computation, but it really only needs one of the tuple elements of the parameter, not the whole tuple. Allocating the whole tuple (because the inputs to the loop can be big) can result in very large memory allocations an significant time spent on memory allocation/initialization. PiperOrigin-RevId: 567543997 --- third_party/xla/xla/service/BUILD | 1 + .../xla/xla/service/while_loop_analysis.cc | 21 ++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 2aee4c3a9ec378..b32c8fb37c070f 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -3209,6 +3209,7 @@ cc_library( "//xla/hlo/ir:hlo", "//xla/hlo/ir:hlo_reachability", "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_map", ], ) diff --git a/third_party/xla/xla/service/while_loop_analysis.cc b/third_party/xla/xla/service/while_loop_analysis.cc index 7a881be0474fe8..dae16d93928085 100644 --- a/third_party/xla/xla/service/while_loop_analysis.cc +++ b/third_party/xla/xla/service/while_loop_analysis.cc @@ -16,6 +16,7 @@ limitations under the License. #include "xla/service/while_loop_analysis.h" #include "absl/base/casts.h" +#include "absl/container/flat_hash_map.h" #include "xla/hlo/evaluator/hlo_evaluator.h" #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" @@ -614,15 +615,29 @@ optional ComputeWhileLoopTripCountUpperBound( << while_body_indvar->ToString(); return nullopt; } + // Create a new while cond computation accessing only the single parameter + // extracted by the GTE above to avoid excessive memory allocation for the + // evaluator. + absl::flat_hash_map> + replacements; + auto new_param = HloInstruction::CreateParameter( + 0, ShapeUtil::MakeTupleShape({cond_gte->shape()}), "temp"); + replacements[cond_gte] = + HloInstruction::CreateGetTupleElement(new_param.get(), 0); + replacements[while_cond_param] = std::move(new_param); + auto new_module = std::make_unique("temp_mod", HloModuleConfig{}); + auto* new_computation = new_module->AddEmbeddedComputation( + while_cond->CloneWithReplacements(&replacements)); // We have a constant. Evaluate the condition on this constant. HloEvaluator evaluator(/*max_loop_iterations=*/0); - Literal fake_input = Literal::CreateFromShape(while_cond_param->shape()); + Literal fake_input = Literal::CreateFromShape( + new_computation->parameter_instruction(0)->shape()); TF_CHECK_OK(fake_input.CopyFrom(while_body_indvar->literal(), - /*dest_shape_index=*/{indvar_index}, + /*dest_shape_index=*/{0}, /*src_shape_index=*/{})); StatusOr eval_result = - evaluator.Evaluate(*while_cond, {std::move(fake_input)}); + evaluator.Evaluate(*new_computation, {std::move(fake_input)}); if (!eval_result.ok()) { VLOG(2) << "Couldn't evaluate while loop condition."; From aeb240d9029c006ef7c66f308bee82854202e1fb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 02:01:48 -0700 Subject: [PATCH 127/567] compat: Update forward compatibility horizon to 2023-09-22 PiperOrigin-RevId: 567558968 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 6e494480e54ba7..4145a22481f391 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 21) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 22) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From a5ff20d0a39f1f6638448076c4eb01613d127d73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 02:01:50 -0700 Subject: [PATCH 128/567] Update GraphDef version to 1627. PiperOrigin-RevId: 567558973 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 7c76122b9da32f..470b11e06459b2 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1626 // Updated: 2023/9/21 +#define TF_GRAPH_DEF_VERSION 1627 // Updated: 2023/9/22 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From bdbcdcea22474ea8278b61ecf688365634d9c9f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20Danyluk?= Date: Fri, 22 Sep 2023 02:59:02 -0700 Subject: [PATCH 129/567] [XLA:GPU] Fix upcasting of dot operands for triton GEMM fusions PiperOrigin-RevId: 567569309 --- third_party/xla/xla/service/gpu/BUILD | 8 +++ .../xla/xla/service/gpu/float_support_test.cc | 52 +++++++++++++++++-- .../xla/xla/service/gpu/gpu_float_support.cc | 24 +++++++++ .../xla/xla/service/gpu/gpu_float_support.h | 6 +++ 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 9f2ce2858edc4d..a18d39d3e56812 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -2432,7 +2432,10 @@ cc_library( hdrs = ["gpu_float_support.h"], visibility = ["//visibility:public"], deps = [ + "//xla:xla_data_proto_cc", + "//xla/hlo/ir:hlo", "//xla/service:float_support", + "@com_google_absl//absl/log:check", ], ) @@ -3666,8 +3669,13 @@ xla_test( "gpu", ], deps = [ + "//xla:error_spec", + "//xla:xla_proto_cc", + "//xla/stream_executor:device_description", "//xla/tests:hlo_test_base", "//xla/tests:xla_internal_test_main", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", ], ) diff --git a/third_party/xla/xla/service/gpu/float_support_test.cc b/third_party/xla/xla/service/gpu/float_support_test.cc index 0f10626606e1ac..3f0bf03a2b1487 100644 --- a/third_party/xla/xla/service/gpu/float_support_test.cc +++ b/third_party/xla/xla/service/gpu/float_support_test.cc @@ -13,18 +13,59 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include - +#include +#include "absl/strings/string_view.h" +#include "xla/error_spec.h" +#include "xla/stream_executor/device_description.h" #include "xla/tests/hlo_test_base.h" +#include "xla/xla.pb.h" namespace xla { namespace gpu { namespace { -using FloatSupportTest = HloTestBase; +using FloatSupportTestWithCublas = HloTestBase; + +class FloatSupportTestWithTriton : public HloTestBase { + public: + se::CudaComputeCapability GetCudaComputeCapability() { + return backend() + .default_stream_executor() + ->GetDeviceDescription() + .cuda_compute_capability(); + } + + DebugOptions GetDebugOptionsForTest() override { + DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_triton_gemm_any(true); + return debug_options; + } +}; + +TEST_F(FloatSupportTestWithCublas, MixedTypeDotIsNotUpcasted) { + constexpr absl::string_view kHloText = R"( +ENTRY e { + p0 = bf16[32,32] parameter(0) + p1 = bf16[32,32] parameter(1) + ROOT d = f32[32,32] dot(p0, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +})"; + + MatchOptimizedHlo(kHloText, R"( +; CHECK-NOT: convert +; CHECK: __cublas +)"); + + EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{1e-6, 1e-6})); +} + +TEST_F(FloatSupportTestWithTriton, MixedTypeDotWithBF16IsNotUpcasted) { + if (!GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << "No BF16 before Ampere."; + } -TEST_F(FloatSupportTest, MixedTypeDotIsNotUpcasted) { - const std::string kHloText = R"( + constexpr absl::string_view kHloText = R"( ENTRY e { p0 = bf16[32,32] parameter(0) p1 = bf16[32,32] parameter(1) @@ -34,6 +75,7 @@ ENTRY e { MatchOptimizedHlo(kHloText, R"( ; CHECK-NOT: convert +; CHECK: __triton )"); EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{1e-6, 1e-6})); diff --git a/third_party/xla/xla/service/gpu/gpu_float_support.cc b/third_party/xla/xla/service/gpu/gpu_float_support.cc index f99e40f0d93d81..7652bed27c61fb 100644 --- a/third_party/xla/xla/service/gpu/gpu_float_support.cc +++ b/third_party/xla/xla/service/gpu/gpu_float_support.cc @@ -15,9 +15,33 @@ limitations under the License. #include "xla/service/gpu/gpu_float_support.h" +#include "absl/log/check.h" +#include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_opcode.h" +#include "xla/service/float_support.h" +#include "xla/xla_data.pb.h" + namespace xla { namespace gpu { +bool GpuFloatSupport::SupportsMixedPrecisions(const HloInstruction& hlo) const { + if (FloatSupport::SupportsMixedPrecisions(hlo)) return true; + + switch (hlo.opcode()) { + // Handled by Triton GEMM or cuBLAS. + case HloOpcode::kDot: { + CHECK_EQ(hlo.operand_count(), 2); + const PrimitiveType lhs_type = hlo.operand(0)->shape().element_type(); + const PrimitiveType rhs_type = hlo.operand(1)->shape().element_type(); + const PrimitiveType result_type = hlo.shape().element_type(); + return (lhs_type == F16 && rhs_type == F16 && result_type == F32) || + (lhs_type == BF16 && rhs_type == BF16 && result_type == F32); + } + default: + return false; + } +} + bool GpuFloatSupport::IsSupported(const HloInstruction& hlo) const { switch (hlo.opcode()) { // Collective ops. diff --git a/third_party/xla/xla/service/gpu/gpu_float_support.h b/third_party/xla/xla/service/gpu/gpu_float_support.h index 51a54020d38420..c9e0e2ac0c48e7 100644 --- a/third_party/xla/xla/service/gpu/gpu_float_support.h +++ b/third_party/xla/xla/service/gpu/gpu_float_support.h @@ -16,7 +16,11 @@ limitations under the License. #ifndef XLA_SERVICE_GPU_GPU_FLOAT_SUPPORT_H_ #define XLA_SERVICE_GPU_GPU_FLOAT_SUPPORT_H_ +#include + +#include "xla/hlo/ir/hlo_instruction.h" #include "xla/service/float_support.h" +#include "xla/xla_data.pb.h" namespace xla { namespace gpu { @@ -36,6 +40,8 @@ class GpuFloatSupport : public FloatSupport { return FloatSupport::SupportsLowPrecisionOutput(hlo) || IsSupported(hlo); } + bool SupportsMixedPrecisions(const HloInstruction& hlo) const override; + private: bool IsSupported(const HloInstruction& hlo) const; }; From 8b0ec1ea93faa08eadbead5b96cc02720cb3569a Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:10:04 +0530 Subject: [PATCH 130/567] Fixed typos in TF doc Several typos fixed in the TF documentation. --- tensorflow/python/debug/lib/debug_events_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/debug/lib/debug_events_reader.py b/tensorflow/python/debug/lib/debug_events_reader.py index c6234afef04da8..706823b799b14a 100644 --- a/tensorflow/python/debug/lib/debug_events_reader.py +++ b/tensorflow/python/debug/lib/debug_events_reader.py @@ -324,7 +324,7 @@ class BaseDigest: for the case of all digests of the same kind coming from the same file. 2. A tuple of a file index and a byte offset. This applies to case - in which the same type of debugger data may come from multple files, + in which the same type of debugger data may come from multiple files, e.g., graph execution traces. """ @@ -1306,7 +1306,7 @@ def execution_to_tensor_values(self, execution): """Read the full tensor values from an Execution or ExecutionDigest. Args: - execution: An `ExecutionDigest` or `ExeuctionDigest` object. + execution: An `ExecutionDigest` or `ExecutionDigest` object. Returns: A list of numpy arrays representing the output tensor values of the From 9cdc613d71a61e5b93f2690f7d522dd341c49948 Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:15:49 +0530 Subject: [PATCH 131/567] Fixing typos --- tensorflow/python/debug/lib/dumping_callback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py index 9f9afad5b599d4..8abfc242c91f3e 100644 --- a/tensorflow/python/debug/lib/dumping_callback.py +++ b/tensorflow/python/debug/lib/dumping_callback.py @@ -594,7 +594,7 @@ def _lookup_tensor_name(self, tensor): tensor: The graph tensor to look up the name for. Returns: - Name of the orignal instrumented tensor as known to the debugger. + Name of the original instrumented tensor as known to the debugger. """ return self._tensor_aliases.get(tensor.name, tensor.name) From 91de7cef66b4bb75f33a3e79b7e9e39805216953 Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:18:51 +0530 Subject: [PATCH 132/567] Update debug_events_writer_test.py --- tensorflow/python/debug/lib/debug_events_writer_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/debug/lib/debug_events_writer_test.py b/tensorflow/python/debug/lib/debug_events_writer_test.py index 1c2f11dff72f83..e7e6a71bf650b1 100644 --- a/tensorflow/python/debug/lib/debug_events_writer_test.py +++ b/tensorflow/python/debug/lib/debug_events_writer_test.py @@ -809,7 +809,7 @@ def testDebuggedGraphToJonsWithNameAndInnerOuterGraphIds(self): ("EmptyList", []), ("None", None), ) - def testGraphOpDigestWithNoOutpusReturnsNumOutputsZero( + def testGraphOpDigestWithNoOutputsReturnsNumOutputsZero( self, output_tensor_ids): op_creation_digest = debug_events_reader.GraphOpCreationDigest( 1234, From fb36fd261aeb2c7a11ebe3fe1f533685b4c030dc Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:20:33 +0530 Subject: [PATCH 133/567] Update debugger_cli_common.py --- tensorflow/python/debug/cli/debugger_cli_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/debug/cli/debugger_cli_common.py b/tensorflow/python/debug/cli/debugger_cli_common.py index 48696e5fdb7a01..a4a184fa3d2628 100644 --- a/tensorflow/python/debug/cli/debugger_cli_common.py +++ b/tensorflow/python/debug/cli/debugger_cli_common.py @@ -813,7 +813,7 @@ def _get_help_for_command_prefix(self, cmd_prefix): aliases. Returns: - A list of str as the help information fo cmd_prefix. If the cmd_prefix + A list of str as the help information for cmd_prefix. If the cmd_prefix does not exist, the returned list of str will indicate that. """ lines = [] From 899323ee1aadbf5d2cb4cfa598eb25224862a86d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20Danyluk?= Date: Fri, 22 Sep 2023 03:45:50 -0700 Subject: [PATCH 134/567] [XLA:GPU][NFC] Move out some constants in OptimizeHloPostLayoutAssignment This is a preparation for a later CL. PiperOrigin-RevId: 567577255 --- third_party/xla/xla/service/gpu/BUILD | 1 + .../xla/xla/service/gpu/gpu_compiler.cc | 98 +++++++++---------- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index a18d39d3e56812..f6ba7b004e3988 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -2677,6 +2677,7 @@ cc_library( "//xla/service:executable", "//xla/service:flatten_call_graph", "//xla/service:float_normalization", + "//xla/service:float_support", "//xla/service:gather_expander", "//xla/service:gather_simplifier", "//xla/service:hlo_computation_deduplicator", diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 1d5c7c106906be..66dd3fd2ac57bc 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -84,10 +84,12 @@ limitations under the License. #include "xla/service/executable.h" #include "xla/service/flatten_call_graph.h" #include "xla/service/float_normalization.h" +#include "xla/service/float_support.h" #include "xla/service/gather_expander.h" #include "xla/service/gather_simplifier.h" #include "xla/service/gpu/alias_passthrough_params.h" #include "xla/service/gpu/all_reduce_blueconnect.h" +#include "xla/service/gpu/autotuner_util.h" #include "xla/service/gpu/compile_module_to_llvm_ir.h" #include "xla/service/gpu/conv_layout_normalization.h" #include "xla/service/gpu/copy_fusion.h" @@ -897,22 +899,53 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( const CompileOptions& options, const GpuTargetConfig& gpu_target_config, const AutotuneResults* autotune_results, tsl::thread::ThreadPool* thread_pool) { + // Constants: const DebugOptions& debug_options = hlo_module->config().debug_options(); + const GpuVersion gpu_version = + gpu_target_config.gpu_device_info.compute_capability; + const se::CudaComputeCapability* const cuda_cc = + std::get_if(&gpu_version); + const AlgebraicSimplifierOptions simplifier_options = [&] { + AlgebraicSimplifierOptions opts; + opts.set_supports_non_canonical_dots(false); + opts.set_is_layout_sensitive(true); + opts.set_enable_conv_operand_swap(false); + // "slow" minmax means we propagate nan. + opts.set_minmax_propagate_nan(!debug_options.xla_gpu_enable_fast_min_max()); + opts.set_enable_unconditional_reduce_of_concat_replacement(false); + return opts; + }(); + TF_ASSIGN_OR_RETURN(AutotuneConfig autotune_config, + GetAutotuneConfig(stream_exec, debug_options, options, + gpu_target_config, autotune_results)); + // Lambdas and related constants: + const GpuFloatSupport bf16_support(BF16); + const GpuFloatSupport f8e5m2_support(F8E5M2); + const GpuFloatSupport f8e4m3fn_support(F8E4M3FN); + const FloatSupport f8e4m3b11fnuz_support(F8E4M3B11FNUZ); + const FloatSupport f8e5m2fnuz_support(F8E5M2FNUZ); + const FloatSupport f8e4m3fnuz_support(F8E4M3FNUZ); + auto add_float_normalization = [&](HloPassPipeline& pipeline) { + auto& sub_pipeline = + pipeline.AddPass("float_normalization"); + sub_pipeline.AddPass(&bf16_support); + sub_pipeline.AddPass(&f8e5m2_support); + sub_pipeline.AddPass(&f8e4m3fn_support); + sub_pipeline.AddPass(&f8e4m3b11fnuz_support); + sub_pipeline.AddPass(&f8e5m2fnuz_support); + sub_pipeline.AddPass(&f8e4m3fnuz_support); + // Remove `f32 -> bf16 -> f32` casts inserted by bf16 normalization. + if (debug_options.xla_gpu_simplify_all_fp_conversions()) { + sub_pipeline.AddPass(); + } + }; { HloPassPipeline pipeline("hlo normalization"); // The LayoutAssignment pass may leave behind kCopy instructions which are // duplicate or NOPs, so remove them with algebraic simplification and CSE. - AlgebraicSimplifierOptions options; - options.set_supports_non_canonical_dots(false); - options.set_is_layout_sensitive(true); - options.set_enable_conv_operand_swap(false); - // "slow" minmax means we propagate nan. - options.set_minmax_propagate_nan( - !debug_options.xla_gpu_enable_fast_min_max()); - options.set_enable_unconditional_reduce_of_concat_replacement(false); - pipeline.AddPass>(options); + pipeline.AddPass>(simplifier_options); // GemmRewriter assumes that all transposes are folded into gemms, but, // since commit 7d529df, this is not always true at this point. @@ -927,9 +960,6 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( pipeline.AddPass>(); // Rewrite GEMMs into custom calls. - GpuVersion gpu_version = - gpu_target_config.gpu_device_info.compute_capability; - const auto* cuda_cc = std::get_if(&gpu_version); if (debug_options.xla_gpu_enable_triton_gemm() && cuda_cc != nullptr && cuda_cc->IsAtLeast(se::CudaComputeCapability::VOLTA)) { pipeline.AddPass(gpu_version); @@ -941,7 +971,7 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( if (debug_options.xla_gpu_normalize_layouts()) { pipeline.AddPass(&NormalizeLayoutForGpuCustomCalls); - pipeline.AddPass>(options); + pipeline.AddPass>(simplifier_options); } pipeline.AddPass(); @@ -954,7 +984,7 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( if (debug_options.xla_gpu_enable_triton_softmax_fusion() && cuda_cc != nullptr && cuda_cc->IsAtLeast(se::CudaComputeCapability::VOLTA)) { - pipeline.AddPass>(options); + pipeline.AddPass>(simplifier_options); pipeline.AddPass(gpu_version); } @@ -981,34 +1011,10 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( return RequiresCollectiveScheduleLinearizer(module, stream_exec); }); - GpuFloatSupport bf16_support(BF16); - GpuFloatSupport f8e5m2_support(F8E5M2); - GpuFloatSupport f8e4m3fn_support(F8E4M3FN); - FloatSupport f8e4m3b11fnuz_support(F8E4M3B11FNUZ); - FloatSupport f8e5m2fnuz_support(F8E5M2FNUZ); - FloatSupport f8e4m3fnuz_support(F8E4M3FNUZ); - - auto add_float_normalization = [&](HloPassPipeline& pipeline) { - auto& sub_pipeline = - pipeline.AddPass("float_normalization"); - sub_pipeline.AddPass(&bf16_support); - sub_pipeline.AddPass(&f8e5m2_support); - sub_pipeline.AddPass(&f8e4m3fn_support); - sub_pipeline.AddPass(&f8e4m3b11fnuz_support); - sub_pipeline.AddPass(&f8e5m2fnuz_support); - sub_pipeline.AddPass(&f8e4m3fnuz_support); - // Remove `f32 -> bf16 -> f32` casts inserted by bf16 normalization. - if (debug_options.xla_gpu_simplify_all_fp_conversions()) { - sub_pipeline.AddPass(); - } - }; // Triton compilation needs normalized operations on bf16 (i.e. converted to // f32). add_float_normalization(pipeline); - TF_ASSIGN_OR_RETURN(AutotuneConfig autotune_config, - GetAutotuneConfig(stream_exec, debug_options, options, - gpu_target_config, autotune_results)); TF_RETURN_IF_ERROR( AddAutotuningPasses(&pipeline, hlo_module, autotune_config, thread_pool)); @@ -1019,19 +1025,9 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( // Clean up new_tuple described above. pipeline.AddPass(); - { - // The LayoutAssignment pass may leave behind kCopy instructions which are - // duplicate or NOPs, so remove them with algebraic simplification and CSE. - AlgebraicSimplifierOptions options; - options.set_supports_non_canonical_dots(false); - options.set_is_layout_sensitive(true); - options.set_enable_conv_operand_swap(false); - // "slow" minmax means we propagate nan. - options.set_minmax_propagate_nan( - !hlo_module->config().debug_options().xla_gpu_enable_fast_min_max()); - options.set_enable_unconditional_reduce_of_concat_replacement(false); - pipeline.AddPass>(options); - } + // The LayoutAssignment pass may leave behind kCopy instructions which are + // duplicate or NOPs, so remove them with algebraic simplification and CSE. + pipeline.AddPass>(simplifier_options); // Since this CSE runs after collective schedule linearizer which inserts // control dependencies, ignore these control deps when replacing instructions From c51351f28b39b3c74946127cb96d501d6d4a8169 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 03:48:13 -0700 Subject: [PATCH 135/567] Integrate LLVM at llvm/llvm-project@3b0f812b9af4 Updates LLVM usage to match [3b0f812b9af4](https://github.com/llvm/llvm-project/commit/3b0f812b9af4) PiperOrigin-RevId: 567577565 --- third_party/llvm/generated.patch | 1717 +----------------------------- third_party/llvm/workspace.bzl | 4 +- 2 files changed, 53 insertions(+), 1668 deletions(-) diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch index 450540fcebd5c7..409e0541024c8e 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch @@ -1,1669 +1,54 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp ---- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp -+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp -@@ -271,10 +271,7 @@ - bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, - IRBuilder<> &Builder); - -- bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly); -- bool hoistSuccIdenticalTerminatorToSwitchOrIf( -- Instruction *TI, Instruction *I1, -- SmallVectorImpl &OtherSuccTIs); -+ bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly); - bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB); - bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB, -@@ -1411,9 +1408,8 @@ - } - - // If we would need to insert a select that uses the value of this invoke --// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would --// need to do this), we can't hoist the invoke, as there is nowhere to put the --// select in this case. -+// (comments in HoistThenElseCodeToIf explain why we would need to do this), we -+// can't hoist the invoke, as there is nowhere to put the select in this case. - static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, - Instruction *I1, Instruction *I2) { - for (BasicBlock *Succ : successors(BB1)) { -@@ -1428,9 +1424,9 @@ - return true; - } - --// Get interesting characteristics of instructions that --// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of --// instructions can be reordered across. -+// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` -+// didn't hoist. They restrict what kind of instructions can be reordered -+// across. - enum SkipFlags { - SkipReadMem = 1, - SkipSideEffect = 2, -@@ -1488,7 +1484,7 @@ - - static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); - --/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical -+/// Helper function for HoistThenElseCodeToIf. Return true if identical - /// instructions \p I1 and \p I2 can and should be hoisted. - static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, - const TargetTransformInfo &TTI) { -@@ -1519,51 +1515,62 @@ - return true; - } - --/// Hoist any common code in the successor blocks up into the block. This --/// function guarantees that BB dominates all successors. If EqTermsOnly is --/// given, only perform hoisting in case both blocks only contain a terminator. --/// In that case, only the original BI will be replaced and selects for PHIs are --/// added. --bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, -- bool EqTermsOnly) { -+/// Given a conditional branch that goes to BB1 and BB2, hoist any common code -+/// in the two blocks up into the branch block. The caller of this function -+/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, -+/// only perform hoisting in case both blocks only contain a terminator. In that -+/// case, only the original BI will be replaced and selects for PHIs are added. -+bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { - // This does very trivial matching, with limited scanning, to find identical -- // instructions in the two blocks. In particular, we don't want to get into -- // O(N1*N2*...) situations here where Ni are the sizes of these successors. As -+ // instructions in the two blocks. In particular, we don't want to get into -+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As - // such, we currently just scan for obviously identical instructions in an - // identical order, possibly separated by the same number of non-identical - // instructions. -- unsigned int SuccSize = succ_size(BB); -- if (SuccSize < 2) -- return false; -+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. -+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination - - // If either of the blocks has it's address taken, then we can't do this fold, - // because the code we'd hoist would no longer run when we jump into the block - // by it's address. -- for (auto *Succ : successors(BB)) -- if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor()) -- return false; -+ if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) -+ return false; - -- auto *TI = BB->getTerminator(); -+ BasicBlock::iterator BB1_Itr = BB1->begin(); -+ BasicBlock::iterator BB2_Itr = BB2->begin(); - -- // The second of pair is a SkipFlags bitmask. -- using SuccIterPair = std::pair; -- SmallVector SuccIterPairs; -- for (auto *Succ : successors(BB)) { -- BasicBlock::iterator SuccItr = Succ->begin(); -- if (isa(*SuccItr)) -- return false; -- SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); -+ Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; -+ // Skip debug info if it is not identical. -+ DbgInfoIntrinsic *DBI1 = dyn_cast(I1); -+ DbgInfoIntrinsic *DBI2 = dyn_cast(I2); -+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { -+ while (isa(I1)) -+ I1 = &*BB1_Itr++; -+ while (isa(I2)) -+ I2 = &*BB2_Itr++; - } -+ if (isa(I1)) -+ return false; -+ -+ BasicBlock *BIParent = BI->getParent(); -+ -+ bool Changed = false; -+ -+ auto _ = make_scope_exit([&]() { -+ if (Changed) -+ ++NumHoistCommonCode; -+ }); - - // Check if only hoisting terminators is allowed. This does not add new - // instructions to the hoist location. - if (EqTermsOnly) { - // Skip any debug intrinsics, as they are free to hoist. -- for (auto &SuccIter : make_first_range(SuccIterPairs)) { -- auto *INonDbg = &*skipDebugIntrinsics(SuccIter); -- if (!INonDbg->isTerminator()) -- return false; -- } -+ auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); -+ auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); -+ if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) -+ return false; -+ if (!I1NonDbg->isTerminator()) -+ return false; - // Now we know that we only need to hoist debug intrinsics and the - // terminator. Let the loop below handle those 2 cases. - } -@@ -1572,234 +1579,154 @@ - // many instructions we skip, serving as a compilation time control as well as - // preventing excessive increase of life ranges. - unsigned NumSkipped = 0; -- // If we find an unreachable instruction at the beginning of a basic block, we -- // can still hoist instructions from the rest of the basic blocks. -- if (SuccIterPairs.size() > 2) { -- erase_if(SuccIterPairs, -- [](const auto &Pair) { return isa(Pair.first); }); -- if (SuccIterPairs.size() < 2) -- return false; -- } - -- bool Changed = false; -+ // Record any skipped instuctions that may read memory, write memory or have -+ // side effects, or have implicit control flow. -+ unsigned SkipFlagsBB1 = 0; -+ unsigned SkipFlagsBB2 = 0; - +diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +@@ -5700,7 +5700,7 @@ + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = N1.getOperand(0); + if (OpOp.getValueType() == VT) { +- if (OpOp.getOpcode() == ISD::AssertZext) { ++ if (OpOp.getOpcode() == ISD::AssertZext && N1->hasOneUse()) { + APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(), + N1.getScalarValueSizeInBits()); + if (MaskedValueIsZero(OpOp, HiBits)) { +diff -ruN --strip-trailing-cr a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp ++++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +@@ -2645,8 +2645,7 @@ for (;;) { -- auto *SuccIterPairBegin = SuccIterPairs.begin(); -- auto &BB1ItrPair = *SuccIterPairBegin++; -- auto OtherSuccIterPairRange = -- iterator_range(SuccIterPairBegin, SuccIterPairs.end()); -- auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange); -- -- Instruction *I1 = &*BB1ItrPair.first; -- auto *BB1 = I1->getParent(); -- -- // Skip debug info if it is not identical. -- bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) { -- Instruction *I2 = &*Iter; -- return I1->isIdenticalToWhenDefined(I2); -- }); -- if (!AllDbgInstsAreIdentical) { -- while (isa(I1)) -- I1 = &*++BB1ItrPair.first; -- for (auto &SuccIter : OtherSuccIterRange) { -- Instruction *I2 = &*SuccIter; -- while (isa(I2)) -- I2 = &*++SuccIter; -- } -- } -- -- bool AllInstsAreIdentical = true; -- bool HasTerminator = I1->isTerminator(); -- for (auto &SuccIter : OtherSuccIterRange) { -- Instruction *I2 = &*SuccIter; -- HasTerminator |= I2->isTerminator(); -- if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2)) -- AllInstsAreIdentical = false; -- } -- - // If we are hoisting the terminator instruction, don't move one (making a - // broken BB), instead clone it, and remove BI. -- if (HasTerminator) { -+ if (I1->isTerminator() || I2->isTerminator()) { - // If any instructions remain in the block, we cannot hoist terminators. -- if (NumSkipped || SuccSize != SuccIterPairs.size() || -- !AllInstsAreIdentical) -+ if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) - return Changed; -- SmallVector Insts; -- for (auto &SuccIter : OtherSuccIterRange) -- Insts.push_back(&*SuccIter); -- return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed; -- } -- -- if (AllInstsAreIdentical) { -- unsigned SkipFlagsBB1 = BB1ItrPair.second; -- AllInstsAreIdentical = -- isSafeToHoistInstr(I1, SkipFlagsBB1) && -- all_of(OtherSuccIterPairRange, [=](const auto &Pair) { -- Instruction *I2 = &*Pair.first; -- unsigned SkipFlagsBB2 = Pair.second; -- // Even if the instructions are identical, it may not -- // be safe to hoist them if we have skipped over -- // instructions with side effects or their operands -- // weren't hoisted. -- return isSafeToHoistInstr(I2, SkipFlagsBB2) && -- shouldHoistCommonInstructions(I1, I2, TTI); -- }); -- } -- -- if (AllInstsAreIdentical) { -- BB1ItrPair.first++; -- if (isa(I1)) { -+ goto HoistTerminator; -+ } -+ -+ if (I1->isIdenticalToWhenDefined(I2) && -+ // Even if the instructions are identical, it may not be safe to hoist -+ // them if we have skipped over instructions with side effects or their -+ // operands weren't hoisted. -+ isSafeToHoistInstr(I1, SkipFlagsBB1) && -+ isSafeToHoistInstr(I2, SkipFlagsBB2) && -+ shouldHoistCommonInstructions(I1, I2, TTI)) { -+ if (isa(I1) || isa(I2)) { -+ assert(isa(I1) && isa(I2)); - // The debug location is an integral part of a debug info intrinsic - // and can't be separated from it or replaced. Instead of attempting - // to merge locations, simply hoist both copies of the intrinsic. -- I1->moveBeforePreserving(TI); -- for (auto &SuccIter : OtherSuccIterRange) { -- auto *I2 = &*SuccIter++; -- assert(isa(I2)); -- I2->moveBeforePreserving(TI); -- } -+ I1->moveBeforePreserving(BI); -+ I2->moveBeforePreserving(BI); -+ Changed = true; - } else { - // For a normal instruction, we just move one to right before the - // branch, then replace all uses of the other with the first. Finally, - // we remove the now redundant second instruction. -- I1->moveBeforePreserving(TI); -- BB->splice(TI->getIterator(), BB1, I1->getIterator()); -- for (auto &SuccIter : OtherSuccIterRange) { -- Instruction *I2 = &*SuccIter++; -- assert(I2 != I1); -- if (!I2->use_empty()) -- I2->replaceAllUsesWith(I1); -- I1->andIRFlags(I2); -- combineMetadataForCSE(I1, I2, true); -- // I1 and I2 are being combined into a single instruction. Its debug -- // location is the merged locations of the original instructions. -- I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); -- I2->eraseFromParent(); -- } -+ I1->moveBeforePreserving(BI); -+ if (!I2->use_empty()) -+ I2->replaceAllUsesWith(I1); -+ I1->andIRFlags(I2); -+ combineMetadataForCSE(I1, I2, true); -+ -+ // I1 and I2 are being combined into a single instruction. Its debug -+ // location is the merged locations of the original instructions. -+ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); -+ -+ I2->eraseFromParent(); - } -- if (!Changed) -- NumHoistCommonCode += SuccIterPairs.size(); - Changed = true; -- NumHoistCommonInstrs += SuccIterPairs.size(); -+ ++NumHoistCommonInstrs; - } else { - if (NumSkipped >= HoistCommonSkipLimit) - return Changed; - // We are about to skip over a pair of non-identical instructions. Record - // if any have characteristics that would prevent reordering instructions - // across them. -- for (auto &SuccIterPair : SuccIterPairs) { -- Instruction *I = &*SuccIterPair.first++; -- SuccIterPair.second |= skippedInstrFlags(I); -- } -+ SkipFlagsBB1 |= skippedInstrFlags(I1); -+ SkipFlagsBB2 |= skippedInstrFlags(I2); - ++NumSkipped; - } -- } --} -- --bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( -- Instruction *TI, Instruction *I1, -- SmallVectorImpl &OtherSuccTIs) { -- -- auto *BI = dyn_cast(TI); -- -- bool Changed = false; -- BasicBlock *TIParent = TI->getParent(); -- BasicBlock *BB1 = I1->getParent(); - -- // Use only for an if statement. -- auto *I2 = *OtherSuccTIs.begin(); -- auto *BB2 = I2->getParent(); -- if (BI) { -- assert(OtherSuccTIs.size() == 1); -- assert(BI->getSuccessor(0) == I1->getParent()); -- assert(BI->getSuccessor(1) == I2->getParent()); -+ I1 = &*BB1_Itr++; -+ I2 = &*BB2_Itr++; -+ // Skip debug info if it is not identical. -+ DbgInfoIntrinsic *DBI1 = dyn_cast(I1); -+ DbgInfoIntrinsic *DBI2 = dyn_cast(I2); -+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { -+ while (isa(I1)) -+ I1 = &*BB1_Itr++; -+ while (isa(I2)) -+ I2 = &*BB2_Itr++; -+ } - } - -- // In the case of an if statement, we try to hoist an invoke. -+ return Changed; -+ -+HoistTerminator: -+ // It may not be possible to hoist an invoke. - // FIXME: Can we define a safety predicate for CallBr? -- // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll -- // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit? -- if (isa(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2))) -- return false; -+ if (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) -+ return Changed; - - // TODO: callbr hoisting currently disabled pending further study. - if (isa(I1)) -- return false; -+ return Changed; - - for (BasicBlock *Succ : successors(BB1)) { - for (PHINode &PN : Succ->phis()) { - Value *BB1V = PN.getIncomingValueForBlock(BB1); -- for (Instruction *OtherSuccTI : OtherSuccTIs) { -- Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent()); -- if (BB1V == BB2V) -- continue; -+ Value *BB2V = PN.getIncomingValueForBlock(BB2); -+ if (BB1V == BB2V) -+ continue; - -- // In the case of an if statement, check for -- // passingValueIsAlwaysUndefined here because we would rather eliminate -- // undefined control flow then converting it to a select. -- if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) || -- passingValueIsAlwaysUndefined(BB2V, &PN)) -- return false; -- } -+ // Check for passingValueIsAlwaysUndefined here because we would rather -+ // eliminate undefined control flow then converting it to a select. -+ if (passingValueIsAlwaysUndefined(BB1V, &PN) || -+ passingValueIsAlwaysUndefined(BB2V, &PN)) -+ return Changed; - } - } - - // Okay, it is safe to hoist the terminator. - Instruction *NT = I1->clone(); -- NT->insertInto(TIParent, TI->getIterator()); -+ NT->insertInto(BIParent, BI->getIterator()); - if (!NT->getType()->isVoidTy()) { - I1->replaceAllUsesWith(NT); -- for (Instruction *OtherSuccTI : OtherSuccTIs) -- OtherSuccTI->replaceAllUsesWith(NT); -+ I2->replaceAllUsesWith(NT); - NT->takeName(I1); - } - Changed = true; -- NumHoistCommonInstrs += OtherSuccTIs.size() + 1; -+ ++NumHoistCommonInstrs; - - // Ensure terminator gets a debug location, even an unknown one, in case - // it involves inlinable calls. -- SmallVector Locs; -- Locs.push_back(I1->getDebugLoc()); -- for (auto *OtherSuccTI : OtherSuccTIs) -- Locs.push_back(OtherSuccTI->getDebugLoc()); -- NT->setDebugLoc(DILocation::getMergedLocations(Locs)); -+ NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - // PHIs created below will adopt NT's merged DebugLoc. - IRBuilder Builder(NT); - -- // In the case of an if statement, hoisting one of the terminators from our -- // successor is a great thing. Unfortunately, the successors of the if/else -- // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2 -- // must agree for all PHI nodes, so we insert select instruction to compute -- // the final result. -- if (BI) { -- std::map, SelectInst *> InsertedSelects; -- for (BasicBlock *Succ : successors(BB1)) { -- for (PHINode &PN : Succ->phis()) { -- Value *BB1V = PN.getIncomingValueForBlock(BB1); -- Value *BB2V = PN.getIncomingValueForBlock(BB2); -- if (BB1V == BB2V) -- continue; -- -- // These values do not agree. Insert a select instruction before NT -- // that determines the right value. -- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; -- if (!SI) { -- // Propagate fast-math-flags from phi node to its replacement select. -- IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); -- if (isa(PN)) -- Builder.setFastMathFlags(PN.getFastMathFlags()); -- -- SI = cast(Builder.CreateSelect( -- BI->getCondition(), BB1V, BB2V, -- BB1V->getName() + "." + BB2V->getName(), BI)); -- } -+ // Hoisting one of the terminators from our successor is a great thing. -+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in -+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI -+ // nodes, so we insert select instruction to compute the final result. -+ std::map, SelectInst *> InsertedSelects; -+ for (BasicBlock *Succ : successors(BB1)) { -+ for (PHINode &PN : Succ->phis()) { -+ Value *BB1V = PN.getIncomingValueForBlock(BB1); -+ Value *BB2V = PN.getIncomingValueForBlock(BB2); -+ if (BB1V == BB2V) -+ continue; - -- // Make the PHI node use the select for all incoming values for BB1/BB2 -- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) -- if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) -- PN.setIncomingValue(i, SI); -- } -+ // These values do not agree. Insert a select instruction before NT -+ // that determines the right value. -+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; -+ if (!SI) { -+ // Propagate fast-math-flags from phi node to its replacement select. -+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); -+ if (isa(PN)) -+ Builder.setFastMathFlags(PN.getFastMathFlags()); -+ -+ SI = cast( -+ Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, -+ BB1V->getName() + "." + BB2V->getName(), BI)); -+ } -+ -+ // Make the PHI node use the select for all incoming values for BB1/BB2 -+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) -+ if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) -+ PN.setIncomingValue(i, SI); - } - } - -@@ -1807,16 +1734,16 @@ - - // Update any PHI nodes in our new successors. - for (BasicBlock *Succ : successors(BB1)) { -- AddPredecessorToBlock(Succ, TIParent, BB1); -+ AddPredecessorToBlock(Succ, BIParent, BB1); - if (DTU) -- Updates.push_back({DominatorTree::Insert, TIParent, Succ}); -+ Updates.push_back({DominatorTree::Insert, BIParent, Succ}); - } - - if (DTU) -- for (BasicBlock *Succ : successors(TI)) -- Updates.push_back({DominatorTree::Delete, TIParent, Succ}); -+ for (BasicBlock *Succ : successors(BI)) -+ Updates.push_back({DominatorTree::Delete, BIParent, Succ}); - -- EraseTerminatorAndDCECond(TI); -+ EraseTerminatorAndDCECond(BI); - if (DTU) - DTU->applyUpdates(Updates); - return Changed; -@@ -2850,8 +2777,8 @@ - Value *OrigV = PN.getIncomingValueForBlock(BB); - Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - -- // FIXME: Try to remove some of the duplication with -- // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial. -+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. -+ // Skip PHIs which are trivial. - if (ThenV == OrigV) + // Look through nodes that don't alter the bits of the incoming value. + unsigned Op = Arg.getOpcode(); +- if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST || +- Op == ISD::AssertZext) { ++ if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { + Arg = Arg.getOperand(0); continue; - -@@ -6888,10 +6815,6 @@ - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - -- if (HoistCommon && -- hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts)) -- return requestResimplify(); -- - return false; - } - -@@ -7158,8 +7081,7 @@ - // can hoist it up to the branching block. - if (BI->getSuccessor(0)->getSinglePredecessor()) { - if (BI->getSuccessor(1)->getSinglePredecessor()) { -- if (HoistCommon && hoistCommonCodeFromSuccessors( -- BI->getParent(), !Options.HoistCommonInsts)) -+ if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts)) - return requestResimplify(); - } else { - // If Successor #1 has multiple preds, we may be able to conditionally -diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll ---- a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll -+++ b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll -@@ -70,7 +70,7 @@ - i64 4, label %sw.bb4 - ] - sw.bb0: -- call void asm sideeffect "nop", ""() -+ call void asm sideeffect "", ""() - ret void - sw.bb1: - call void asm sideeffect "", ""() -diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll ---- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll -+++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll -@@ -19,9 +19,21 @@ - - define void @foo_switch(i64 %C, ptr %P) { - ; CHECK-LABEL: @foo_switch( --; CHECK-NEXT: common.ret: --; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4 -+; CHECK-NEXT: switch i64 [[C:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: common.ret: - ; CHECK-NEXT: ret void -+; CHECK: bb0: -+; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4 -+; CHECK-NEXT: br label [[COMMON_RET:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: store i32 7, ptr [[P]], align 4 -+; CHECK-NEXT: br label [[COMMON_RET]] -+; CHECK: bb2: -+; CHECK-NEXT: store i32 7, ptr [[P]], align 4 -+; CHECK-NEXT: br label [[COMMON_RET]] - ; - switch i64 %C, label %bb0 [ - i64 1, label %bb1 -diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll ---- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll -+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll -@@ -26,11 +26,27 @@ - - define void @test_switch(i64 %i, ptr %Q) { - ; CHECK-LABEL: @test_switch( --; CHECK-NEXT: common.ret: -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: common.ret: -+; CHECK-NEXT: ret void -+; CHECK: bb0: - ; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 - ; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4 - ; CHECK-NEXT: call void @bar(i32 [[A]]) --; CHECK-NEXT: ret void -+; CHECK-NEXT: br label [[COMMON_RET:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: store i32 1, ptr [[Q]], align 4 -+; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4 -+; CHECK-NEXT: call void @bar(i32 [[B]]) -+; CHECK-NEXT: br label [[COMMON_RET]] -+; CHECK: bb2: -+; CHECK-NEXT: store i32 1, ptr [[Q]], align 4 -+; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[Q]], align 4 -+; CHECK-NEXT: call void @bar(i32 [[C]]) -+; CHECK-NEXT: br label [[COMMON_RET]] - ; - switch i64 %i, label %bb0 [ - i64 1, label %bb1 -@@ -53,41 +69,25 @@ - ret void - } - --; We ensure that we examine all instructions during each iteration to confirm the presence of a terminating one. --define void @test_switch_reach_terminator(i64 %i, ptr %p) { --; CHECK-LABEL: @test_switch_reach_terminator( --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[COMMON_RET:%.*]] --; CHECK-NEXT: ] --; CHECK: common.ret: --; CHECK-NEXT: ret void --; CHECK: bb0: --; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4 --; CHECK-NEXT: br label [[COMMON_RET]] --; CHECK: bb1: --; CHECK-NEXT: store i32 2, ptr [[P]], align 4 --; CHECK-NEXT: br label [[COMMON_RET]] --; -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] --bb0: ; preds = %0 -- store i32 1, ptr %p -- ret void --bb1: ; preds = %0 -- store i32 2, ptr %p -- ret void --bb2: ; preds = %0 -- ret void --} -- - define i1 @common_instr_on_switch(i64 %a, i64 %b, i64 %c) unnamed_addr { - ; CHECK-LABEL: @common_instr_on_switch( - ; CHECK-NEXT: start: -+; CHECK-NEXT: switch i64 [[A:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: - ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] --; CHECK-NEXT: ret i1 [[TMP0]] -+; CHECK-NEXT: br label [[EXIT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[B]], [[C]] -+; CHECK-NEXT: br label [[EXIT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] -+; CHECK-NEXT: br label [[EXIT]] -+; CHECK: exit: -+; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] -+; CHECK-NEXT: ret i1 [[RESULT]] - ; - start: - switch i64 %a, label %bb0 [ -diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll ---- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll -+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code-with-unreachable.ll -@@ -4,8 +4,25 @@ - define i1 @common_instr_with_unreachable(i64 %a, i64 %b, i64 %c) { - ; CHECK-LABEL: @common_instr_with_unreachable( - ; CHECK-NEXT: start: -+; CHECK-NEXT: switch i64 [[A:%.*]], label [[UNREACHABLE:%.*]] [ -+; CHECK-NEXT: i64 0, label [[BB0:%.*]] -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: unreachable: -+; CHECK-NEXT: unreachable -+; CHECK: bb0: - ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] --; CHECK-NEXT: ret i1 [[TMP0]] -+; CHECK-NEXT: br label [[EXIT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[B]], [[C]] -+; CHECK-NEXT: br label [[EXIT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] -+; CHECK-NEXT: br label [[EXIT]] -+; CHECK: exit: -+; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] -+; CHECK-NEXT: ret i1 [[RESULT]] - ; - start: - switch i64 %a, label %unreachable [ -@@ -37,90 +54,43 @@ - define i1 @common_instr_with_unreachable_2(i64 %a, i64 %b, i64 %c) { - ; CHECK-LABEL: @common_instr_with_unreachable_2( - ; CHECK-NEXT: start: --; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] --; CHECK-NEXT: ret i1 [[TMP0]] --; --start: -- switch i64 %a, label %bb1 [ -- i64 0, label %bb0 -- i64 1, label %unreachable -- i64 2, label %bb2 -- ] -- --unreachable: -- unreachable -- --bb0: ; preds = %start -- %0 = icmp eq i64 %b, %c -- br label %exit -- --bb1: ; preds = %start -- %1 = icmp eq i64 %b, %c -- br label %exit -- --bb2: ; preds = %start -- %2 = icmp eq i64 %b, %c -- br label %exit -- --exit: ; preds = %bb2, %bb1, %bb0 -- %result = phi i1 [ %0, %bb0 ], [ %1, %bb1 ], [ %2, %bb2 ] -- ret i1 %result --} -- --declare void @no_return() --declare void @foo() -- --define i1 @not_only_unreachable(i64 %a, i64 %b, i64 %c) { --; CHECK-LABEL: @not_only_unreachable( --; CHECK-NEXT: start: --; CHECK-NEXT: switch i64 [[A:%.*]], label [[UNREACHABLE:%.*]] [ -+; CHECK-NEXT: switch i64 [[A:%.*]], label [[BB1:%.*]] [ - ; CHECK-NEXT: i64 0, label [[BB0:%.*]] --; CHECK-NEXT: i64 1, label [[BB1:%.*]] - ; CHECK-NEXT: i64 2, label [[BB2:%.*]] - ; CHECK-NEXT: ] --; CHECK: unreachable: --; CHECK-NEXT: call void @no_return() --; CHECK-NEXT: unreachable - ; CHECK: bb0: - ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[B:%.*]], [[C:%.*]] --; CHECK-NEXT: call void @foo() - ; CHECK-NEXT: br label [[EXIT:%.*]] - ; CHECK: bb1: - ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[B]], [[C]] --; CHECK-NEXT: call void @foo() - ; CHECK-NEXT: br label [[EXIT]] - ; CHECK: bb2: - ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[B]], [[C]] --; CHECK-NEXT: call void @foo() - ; CHECK-NEXT: br label [[EXIT]] - ; CHECK: exit: - ; CHECK-NEXT: [[RESULT:%.*]] = phi i1 [ [[TMP0]], [[BB0]] ], [ [[TMP1]], [[BB1]] ], [ [[TMP2]], [[BB2]] ] - ; CHECK-NEXT: ret i1 [[RESULT]] - ; - start: -- switch i64 %a, label %unreachable [ -+ switch i64 %a, label %bb1 [ - i64 0, label %bb0 -- i64 1, label %bb1 -+ i64 1, label %unreachable - i64 2, label %bb2 - ] - - unreachable: -- call void @no_return() - unreachable - - bb0: ; preds = %start - %0 = icmp eq i64 %b, %c -- call void @foo() - br label %exit - - bb1: ; preds = %start - %1 = icmp eq i64 %b, %c -- call void @foo() - br label %exit - - bb2: ; preds = %start - %2 = icmp eq i64 %b, %c -- call void @foo() - br label %exit - - exit: ; preds = %bb2, %bb1, %bb0 -diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll ---- a/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll -+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll -@@ -48,68 +48,6 @@ - ret void - } - --define void @f0_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { --; CHECK-LABEL: @f0_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP0]], 1 --; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD]], [[TMP1]] --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[TMP0]], 1 --; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[SUB]], 3 --; CHECK-NEXT: [[V:%.*]] = add i16 [[SUB]], [[TMP2]] --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i16 [[TMP0]], 1 --; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[SUB2]], 3 --; CHECK-NEXT: [[W:%.*]] = add i16 [[SUB2]], [[TMP3]] --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[V]], [[BB1]] ], [ [[W]], [[BB2]] ] --; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 --; CHECK-NEXT: ret void --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %0 = load i16, ptr %b, align 2 -- %add = add nsw i16 %0, 1 -- %1 = load i16, ptr %m, align 2 -- %u = add i16 %add, %1 -- br label %end -- --bb1: -- %2 = load i16, ptr %b, align 2 -- %sub = sub nsw i16 %2, 1 -- %3 = load i16, ptr %m, align 2 -- %4 = add i16 %sub, 3 -- %v = add i16 %sub, %4 -- br label %end -- --bb2: -- %5 = load i16, ptr %b, align 2 -- %sub2 = sub nsw i16 %5, 1 -- %6 = load i16, ptr %m, align 2 -- %7 = add i16 %sub2, 3 -- %w = add i16 %sub2, %7 -- br label %end -- --end: -- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] -- store i16 %uv, ptr %d, align 2 -- ret void --} - - ;; Check some instructions (e.g. add) can be reordered across instructions with side - ;; effects, while others (e.g. load) can't. -@@ -159,70 +97,6 @@ - ret void - } - --define void @f2_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { --; CHECK-LABEL: @f2_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: [[ADD_0:%.*]] = add nsw i16 [[TMP0]], 1 --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: call void @side_effects0() --; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 --; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD_0]], [[TMP1]] --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: call void @no_side_effects0() --; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[M]], align 2 --; CHECK-NEXT: [[V:%.*]] = add i16 [[ADD_0]], [[TMP2]] --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: call void @no_side_effects0() --; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[M]], align 2 --; CHECK-NEXT: [[W:%.*]] = add i16 [[ADD_0]], [[TMP3]] --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[V]], [[BB1]] ], [ [[W]], [[BB2]] ] --; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 --; CHECK-NEXT: ret void --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %0 = load i16, ptr %b, align 2 -- call void @side_effects0() -- %add.0 = add nsw i16 %0, 1 -- %1 = load i16, ptr %m, align 2 -- %u = add i16 %add.0, %1 -- br label %end -- --bb1: -- %2 = load i16, ptr %b, align 2 -- call void @no_side_effects0() -- %add.1 = add nsw i16 %2, 1 -- %3 = load i16, ptr %m, align 2 -- %v = add i16 %add.1, %3 -- br label %end -- --bb2: -- %4 = load i16, ptr %b, align 2 -- call void @no_side_effects0() -- %add.2 = add nsw i16 %4, 1 -- %5 = load i16, ptr %m, align 2 -- %w = add i16 %add.2, %5 -- br label %end -- --end: -- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] -- store i16 %uv, ptr %d, align 2 -- ret void --} - - ;; Check indeed it was the side effects that prevented hoisting the load - ;; in the previous test. -@@ -269,67 +143,6 @@ - ret void - } - --define void @f3_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { --; CHECK-LABEL: @f3_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: [[ADD_0:%.*]] = add nsw i16 [[TMP0]], 1 --; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 --; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD_0]], [[TMP1]] --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: call void @no_side_effects0() --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: call void @no_side_effects1() --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: call void @no_side_effects1() --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[U]], [[BB1]] ], [ [[U]], [[BB2]] ] --; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 --; CHECK-NEXT: ret void --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %0 = load i16, ptr %b, align 2 -- call void @no_side_effects0() -- %add.0 = add nsw i16 %0, 1 -- %1 = load i16, ptr %m, align 2 -- %u = add i16 %add.0, %1 -- br label %end -- --bb1: -- %2 = load i16, ptr %b, align 2 -- call void @no_side_effects1() -- %add.1 = add nsw i16 %2, 1 -- %3 = load i16, ptr %m, align 2 -- %v = add i16 %add.1, %3 -- br label %end -- --bb2: -- %4 = load i16, ptr %b, align 2 -- call void @no_side_effects1() -- %add.2 = add nsw i16 %4, 1 -- %5 = load i16, ptr %m, align 2 -- %w = add i16 %add.2, %5 -- br label %end -- --end: -- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] -- store i16 %uv, ptr %d, align 2 -- ret void --} -- - ;; Check some instructions (e.g. sdiv) are not speculatively executed. - - ;; Division by non-zero constant OK to speculate ... -@@ -373,63 +186,6 @@ - ret void - } - --define void @f4_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { --; CHECK-LABEL: @f4_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 [[TMP0]], 2 --; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: call void @side_effects0() --; CHECK-NEXT: br label [[IF_END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: call void @side_effects1() --; CHECK-NEXT: br label [[IF_END]] --; CHECK: bb2: --; CHECK-NEXT: call void @side_effects1() --; CHECK-NEXT: br label [[IF_END]] --; CHECK: if.end: --; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[U]], [[BB1]] ], [ [[U]], [[BB2]] ] --; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 --; CHECK-NEXT: ret void --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %0 = load i16, ptr %b, align 2 -- call void @side_effects0() -- %div.0 = sdiv i16 %0, 2 -- %u = add i16 %div.0, %0 -- br label %if.end -- --bb1: -- %1 = load i16, ptr %b, align 2 -- call void @side_effects1() -- %div.1 = sdiv i16 %1, 2 -- %v = add i16 %div.1, %1 -- br label %if.end -- --bb2: -- %2 = load i16, ptr %b, align 2 -- call void @side_effects1() -- %div.2 = sdiv i16 %2, 2 -- %w = add i16 %div.2, %2 -- br label %if.end -- --if.end: -- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] -- store i16 %uv, ptr %d, align 2 -- ret void --} -- - ;; ... but not a general division ... - define void @f5(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { - ; CHECK-LABEL: @f5( -@@ -474,67 +230,6 @@ - ret void - } - --define void @f5_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { --; CHECK-LABEL: @f5_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: call void @side_effects0() --; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 211, [[TMP0]] --; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: call void @side_effects1() --; CHECK-NEXT: [[DIV_1:%.*]] = sdiv i16 211, [[TMP0]] --; CHECK-NEXT: [[V:%.*]] = add i16 [[DIV_1]], [[TMP0]] --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: call void @side_effects1() --; CHECK-NEXT: [[DIV_2:%.*]] = sdiv i16 211, [[TMP0]] --; CHECK-NEXT: [[W:%.*]] = add i16 [[DIV_2]], [[TMP0]] --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[V]], [[BB1]] ], [ [[W]], [[BB2]] ] --; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 --; CHECK-NEXT: ret void --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %0 = load i16, ptr %b, align 2 -- call void @side_effects0() -- %div.0 = sdiv i16 211, %0 -- %u = add i16 %div.0, %0 -- br label %end -- --bb1: -- %1 = load i16, ptr %b, align 2 -- call void @side_effects1() -- %div.1 = sdiv i16 211, %1 -- %v = add i16 %div.1, %1 -- br label %end -- --bb2: -- %2 = load i16, ptr %b, align 2 -- call void @side_effects1() -- %div.2 = sdiv i16 211, %2 -- %w = add i16 %div.2, %2 -- br label %end -- --end: -- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] -- store i16 %uv, ptr %d, align 2 -- ret void --} -- - ;; ... and it's also OK to hoist the division when there's no speculation happening. - define void @f6(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { - ; CHECK-LABEL: @f6( -@@ -576,63 +271,6 @@ - ret void - } - --define void @f6_switch(i64 %i, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { --; CHECK-LABEL: @f6_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 211, [[TMP0]] --; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: call void @no_side_effects0() --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: call void @no_side_effects1() --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: call void @no_side_effects1() --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[U]], [[BB0]] ], [ [[U]], [[BB1]] ], [ [[U]], [[BB2]] ] --; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 --; CHECK-NEXT: ret void --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %0 = load i16, ptr %b, align 2 -- call void @no_side_effects0() -- %div.0 = sdiv i16 211, %0 -- %u = add i16 %div.0, %0 -- br label %end -- --bb1: -- %1 = load i16, ptr %b, align 2 -- call void @no_side_effects1() -- %div.1 = sdiv i16 211, %1 -- %v = add i16 %div.1, %1 -- br label %end -- --bb2: -- %2 = load i16, ptr %b, align 2 -- call void @no_side_effects1() -- %div.2 = sdiv i16 211, %2 -- %w = add i16 %div.2, %2 -- br label %end -- --end: -- %uv = phi i16 [ %u, %bb0 ], [ %v, %bb1 ], [ %w, %bb2 ] -- store i16 %uv, ptr %d, align 2 -- ret void --} -- - ;; No reorder of store over a load. - define i16 @f7(i1 %c, ptr %a, ptr %b) { - ; CHECK-LABEL: @f7( -@@ -668,55 +306,6 @@ - ret i16 %v - } - --define i16 @f7_switch(i64 %i, ptr %a, ptr %b) { --; CHECK-LABEL: @f7_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: [[VA:%.*]] = load i16, ptr [[A:%.*]], align 2 --; CHECK-NEXT: store i16 0, ptr [[B:%.*]], align 2 --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B]], align 2 --; CHECK-NEXT: store i16 0, ptr [[B]], align 2 --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: [[VC:%.*]] = load i16, ptr [[B]], align 2 --; CHECK-NEXT: store i16 0, ptr [[B]], align 2 --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[BB0]] ], [ [[VB]], [[BB1]] ], [ [[VC]], [[BB2]] ] --; CHECK-NEXT: ret i16 [[V]] --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %va = load i16, ptr %a, align 2 -- store i16 0, ptr %b, align 2 -- br label %end -- --bb1: -- %vb = load i16, ptr %b, align 2 -- store i16 0, ptr %b, align 2 -- br label %end -- --bb2: -- %vc = load i16, ptr %b, align 2 -- store i16 0, ptr %b, align 2 -- br label %end -- --end: -- %v = phi i16 [ %va, %bb0 ], [ %vb, %bb1 ], [ %vc, %bb2 ] -- ret i16 %v --} -- - ;; Can reorder load over another load - define i16 @f8(i1 %cond, ptr %a, ptr %b, ptr %c) { - ; CHECK-LABEL: @f8( -@@ -757,59 +346,6 @@ - ret i16 %w - } - --define i16 @f8_switch(i64 %i, ptr %a, ptr %b, ptr %c) { --; CHECK-LABEL: @f8_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: [[C_0:%.*]] = load i16, ptr [[C:%.*]], align 2 --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: [[VA:%.*]] = load i16, ptr [[A:%.*]], align 2 --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: [[VC:%.*]] = load i16, ptr [[B]], align 2 --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[BB0]] ], [ [[VB]], [[BB1]] ], [ [[VC]], [[BB2]] ] --; CHECK-NEXT: [[U:%.*]] = phi i16 [ [[C_0]], [[BB0]] ], [ [[C_0]], [[BB1]] ], [ [[C_0]], [[BB2]] ] --; CHECK-NEXT: [[W:%.*]] = add i16 [[V]], [[U]] --; CHECK-NEXT: ret i16 [[W]] --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %va = load i16, ptr %a, align 2 -- %c.0 = load i16, ptr %c -- br label %end -- --bb1: -- %vb = load i16, ptr %b, align 2 -- %c.1 = load i16, ptr %c -- br label %end -- --bb2: -- %vc = load i16, ptr %b, align 2 -- %c.2 = load i16, ptr %c -- br label %end -- --end: -- %v = phi i16 [ %va, %bb0 ], [ %vb, %bb1 ], [ %vc, %bb2 ] -- %u = phi i16 [ %c.0, %bb0 ], [ %c.1, %bb1 ], [ %c.2, %bb2 ] -- -- %w = add i16 %v, %u -- -- ret i16 %w --} -- - ;; Currently won't reorder volatile and non-volatile loads. - define i16 @f9(i1 %cond, ptr %a, ptr %b, ptr %c) { - ; CHECK-LABEL: @f9( -@@ -851,61 +387,6 @@ - ret i16 %w - } - --define i16 @f9_switch(i64 %i, ptr %a, ptr %b, ptr %c) { --; CHECK-LABEL: @f9_switch( --; CHECK-NEXT: entry: --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: [[VA:%.*]] = load volatile i16, ptr [[A:%.*]], align 2 --; CHECK-NEXT: [[C_0:%.*]] = load i16, ptr [[C:%.*]], align 2 --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B:%.*]], align 2 --; CHECK-NEXT: [[C_1:%.*]] = load i16, ptr [[C]], align 2 --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: [[VC:%.*]] = load i16, ptr [[B]], align 2 --; CHECK-NEXT: [[C_2:%.*]] = load i16, ptr [[C]], align 2 --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[BB0]] ], [ [[VB]], [[BB1]] ], [ [[VC]], [[BB2]] ] --; CHECK-NEXT: [[U:%.*]] = phi i16 [ [[C_0]], [[BB0]] ], [ [[C_1]], [[BB1]] ], [ [[C_2]], [[BB2]] ] --; CHECK-NEXT: [[W:%.*]] = add i16 [[V]], [[U]] --; CHECK-NEXT: ret i16 [[W]] --; --entry: -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %va = load volatile i16, ptr %a, align 2 -- %c.0 = load i16, ptr %c -- br label %end -- --bb1: -- %vb = load i16, ptr %b, align 2 -- %c.1 = load i16, ptr %c -- br label %end -- --bb2: -- %vc = load i16, ptr %b, align 2 -- %c.2 = load i16, ptr %c -- br label %end -- --end: -- %v = phi i16 [ %va, %bb0 ], [ %vb, %bb1 ], [ %vc, %bb2 ] -- %u = phi i16 [ %c.0, %bb0 ], [ %c.1, %bb1 ], [ %c.2, %bb2 ] -- -- %w = add i16 %v, %u -- -- ret i16 %w --} -- - ;; Don't hoist stacksaves across inalloca allocas - define void @f10(i1 %cond) { - ; CHECK-LABEL: @f10( -@@ -953,79 +434,6 @@ - br label %end - - end: -- call void @llvm.stackrestore(ptr %ss) -- ret void --} -- --define void @f10_switch(i64 %i) { --; CHECK-LABEL: @f10_switch( --; CHECK-NEXT: [[SS:%.*]] = call ptr @llvm.stacksave.p0() --; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ --; CHECK-NEXT: i64 1, label [[BB1:%.*]] --; CHECK-NEXT: i64 2, label [[BB2:%.*]] --; CHECK-NEXT: ] --; CHECK: bb0: --; CHECK-NEXT: [[I1:%.*]] = alloca inalloca i32, align 4 --; CHECK-NEXT: [[SS2:%.*]] = call ptr @llvm.stacksave.p0() --; CHECK-NEXT: [[I2:%.*]] = alloca inalloca i64, align 8 --; CHECK-NEXT: call void @inalloca_i64(ptr inalloca(i64) [[I2]]) --; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS2]]) --; CHECK-NEXT: call void @inalloca_i32(ptr inalloca(i32) [[I1]]) --; CHECK-NEXT: br label [[END:%.*]] --; CHECK: bb1: --; CHECK-NEXT: [[I3:%.*]] = alloca inalloca i64, align 8 --; CHECK-NEXT: [[SS3:%.*]] = call ptr @llvm.stacksave.p0() --; CHECK-NEXT: [[I4:%.*]] = alloca inalloca i64, align 8 --; CHECK-NEXT: [[TMP1:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I4]]) --; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS3]]) --; CHECK-NEXT: [[TMP2:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I3]]) --; CHECK-NEXT: br label [[END]] --; CHECK: bb2: --; CHECK-NEXT: [[I5:%.*]] = alloca inalloca i64, align 8 --; CHECK-NEXT: [[SS4:%.*]] = call ptr @llvm.stacksave.p0() --; CHECK-NEXT: [[I6:%.*]] = alloca inalloca i64, align 8 --; CHECK-NEXT: [[TMP3:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I6]]) --; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS4]]) --; CHECK-NEXT: [[TMP4:%.*]] = call ptr @inalloca_i64(ptr inalloca(i64) [[I5]]) --; CHECK-NEXT: br label [[END]] --; CHECK: end: --; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SS]]) --; CHECK-NEXT: ret void --; -- %ss = call ptr @llvm.stacksave() -- switch i64 %i, label %bb0 [ -- i64 1, label %bb1 -- i64 2, label %bb2 -- ] -- --bb0: -- %i1 = alloca inalloca i32 -- %ss2 = call ptr @llvm.stacksave() -- %i2 = alloca inalloca i64 -- call void @inalloca_i64(ptr inalloca(i64) %i2) -- call void @llvm.stackrestore(ptr %ss2) -- call void @inalloca_i32(ptr inalloca(i32) %i1) -- br label %end -- --bb1: -- %i3 = alloca inalloca i64 -- %ss3 = call ptr @llvm.stacksave() -- %i4 = alloca inalloca i64 -- call ptr @inalloca_i64(ptr inalloca(i64) %i4) -- call void @llvm.stackrestore(ptr %ss3) -- call ptr @inalloca_i64(ptr inalloca(i64) %i3) -- br label %end -- --bb2: -- %i5 = alloca inalloca i64 -- %ss4 = call ptr @llvm.stacksave() -- %i6 = alloca inalloca i64 -- call ptr @inalloca_i64(ptr inalloca(i64) %i6) -- call void @llvm.stackrestore(ptr %ss4) -- call ptr @inalloca_i64(ptr inalloca(i64) %i5) -- br label %end -- --end: - call void @llvm.stackrestore(ptr %ss) - ret void - } -diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll ---- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll -+++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll -@@ -21,8 +21,20 @@ - - define void @hoist_range_switch(i64 %i, ptr %p) { - ; CHECK-LABEL: @hoist_range_switch( --; CHECK-NEXT: out: -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: - ; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !range [[RNG1:![0-9]+]] -+; CHECK-NEXT: br label [[OUT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[E:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG2:![0-9]+]] -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[F:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG3:![0-9]+]] -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: out: - ; CHECK-NEXT: ret void - ; - switch i64 %i, label %bb0 [ -@@ -45,7 +57,7 @@ - define void @hoist_both_noundef(i1 %c, ptr %p) { - ; CHECK-LABEL: @hoist_both_noundef( - ; CHECK-NEXT: if: --; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !2 -+; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !4 - ; CHECK-NEXT: ret void - ; - if: -@@ -66,8 +78,20 @@ - - define void @hoist_both_noundef_switch(i64 %i, ptr %p) { - ; CHECK-LABEL: @hoist_both_noundef_switch( --; CHECK-NEXT: out: --; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !2 -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: -+; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !4 -+; CHECK-NEXT: br label [[OUT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[E:%.*]] = load i8, ptr [[P]], align 1, !noundef !4 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[F:%.*]] = load i8, ptr [[P]], align 1, !noundef !4 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: out: - ; CHECK-NEXT: ret void - ; - switch i64 %i, label %bb0 [ -@@ -110,8 +134,20 @@ - - define void @hoist_one_noundef_switch(i64 %i, ptr %p) { - ; CHECK-LABEL: @hoist_one_noundef_switch( --; CHECK-NEXT: out: --; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1 -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: -+; CHECK-NEXT: [[T:%.*]] = load i8, ptr [[P:%.*]], align 1, !noundef !4 -+; CHECK-NEXT: br label [[OUT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[E:%.*]] = load i8, ptr [[P]], align 1 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[F:%.*]] = load i8, ptr [[P]], align 1, !noundef !4 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: out: - ; CHECK-NEXT: ret void - ; - switch i64 %i, label %bb0 [ -@@ -134,7 +170,7 @@ - define void @hoist_dereferenceable(i1 %c, ptr %p) { - ; CHECK-LABEL: @hoist_dereferenceable( - ; CHECK-NEXT: if: --; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !3 -+; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !5 - ; CHECK-NEXT: ret void - ; - if: -@@ -151,8 +187,20 @@ - - define void @hoist_dereferenceable_switch(i64 %i, ptr %p) { - ; CHECK-LABEL: @hoist_dereferenceable_switch( --; CHECK-NEXT: out: --; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !3 -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: -+; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable !5 -+; CHECK-NEXT: br label [[OUT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[E:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !6 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[F:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: out: - ; CHECK-NEXT: ret void - ; - switch i64 %i, label %bb0 [ -@@ -175,7 +223,7 @@ - define void @hoist_dereferenceable_or_null(i1 %c, ptr %p) { - ; CHECK-LABEL: @hoist_dereferenceable_or_null( - ; CHECK-NEXT: if: --; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !3 -+; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !5 - ; CHECK-NEXT: ret void - ; - if: -@@ -192,8 +240,20 @@ - - define void @hoist_dereferenceable_or_null_switch(i64 %i, ptr %p) { - ; CHECK-LABEL: @hoist_dereferenceable_or_null_switch( --; CHECK-NEXT: out: --; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !3 -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: -+; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[P:%.*]], align 8, !dereferenceable_or_null !6 -+; CHECK-NEXT: br label [[OUT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[E:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !5 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[F:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !7 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: out: - ; CHECK-NEXT: ret void - ; - switch i64 %i, label %bb0 [ -@@ -217,7 +277,7 @@ - define i32 @speculate_range(i1 %c, ptr dereferenceable(8) align 8 %p) { - ; CHECK-LABEL: @speculate_range( - ; CHECK-NEXT: entry: --; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG4:![0-9]+]] -+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG8:![0-9]+]] - ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[V]], i32 0 - ; CHECK-NEXT: ret i32 [[SPEC_SELECT]] - ; -@@ -238,7 +298,7 @@ - define ptr @speculate_nonnull(i1 %c, ptr dereferenceable(8) align 8 %p) { - ; CHECK-LABEL: @speculate_nonnull( - ; CHECK-NEXT: entry: --; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !2 -+; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !4 - ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null - ; CHECK-NEXT: ret ptr [[SPEC_SELECT]] - ; -@@ -259,7 +319,7 @@ - define ptr @speculate_align(i1 %c, ptr dereferenceable(8) align 8 %p) { - ; CHECK-LABEL: @speculate_align( - ; CHECK-NEXT: entry: --; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align !5 -+; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align !9 - ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null - ; CHECK-NEXT: ret ptr [[SPEC_SELECT]] - ; -@@ -278,7 +338,7 @@ - define void @hoist_fpmath(i1 %c, double %x) { - ; CHECK-LABEL: @hoist_fpmath( - ; CHECK-NEXT: if: --; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !6 -+; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !10 - ; CHECK-NEXT: ret void - ; - if: -@@ -295,8 +355,20 @@ - - define void @hoist_fpmath_switch(i64 %i, double %x) { - ; CHECK-LABEL: @hoist_fpmath_switch( --; CHECK-NEXT: out: --; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !6 -+; CHECK-NEXT: switch i64 [[I:%.*]], label [[BB0:%.*]] [ -+; CHECK-NEXT: i64 1, label [[BB1:%.*]] -+; CHECK-NEXT: i64 2, label [[BB2:%.*]] -+; CHECK-NEXT: ] -+; CHECK: bb0: -+; CHECK-NEXT: [[T:%.*]] = fadd double [[X:%.*]], 1.000000e+00, !fpmath !10 -+; CHECK-NEXT: br label [[OUT:%.*]] -+; CHECK: bb1: -+; CHECK-NEXT: [[E:%.*]] = fadd double [[X]], 1.000000e+00, !fpmath !11 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: bb2: -+; CHECK-NEXT: [[F:%.*]] = fadd double [[X]], 1.000000e+00, !fpmath !12 -+; CHECK-NEXT: br label [[OUT]] -+; CHECK: out: - ; CHECK-NEXT: ret void - ; - switch i64 %i, label %bb0 [ -@@ -322,10 +394,16 @@ - !3 = !{ i8 7, i8 9 } - ;. - ; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5} --; CHECK: [[RNG1]] = !{i8 0, i8 1, i8 3, i8 5, i8 7, i8 9} --; CHECK: [[META2:![0-9]+]] = !{} --; CHECK: [[META3:![0-9]+]] = !{i64 10} --; CHECK: [[RNG4]] = !{i32 0, i32 10} --; CHECK: [[META5:![0-9]+]] = !{i64 4} --; CHECK: [[META6:![0-9]+]] = !{float 2.500000e+00} -+; CHECK: [[RNG1]] = !{i8 0, i8 1} -+; CHECK: [[RNG2]] = !{i8 3, i8 5} -+; CHECK: [[RNG3]] = !{i8 7, i8 9} -+; CHECK: [[META4:![0-9]+]] = !{} -+; CHECK: [[META5:![0-9]+]] = !{i64 10} -+; CHECK: [[META6:![0-9]+]] = !{i64 20} -+; CHECK: [[META7:![0-9]+]] = !{i64 30} -+; CHECK: [[RNG8]] = !{i32 0, i32 10} -+; CHECK: [[META9:![0-9]+]] = !{i64 4} -+; CHECK: [[META10:![0-9]+]] = !{float 2.500000e+00} -+; CHECK: [[META11:![0-9]+]] = !{float 5.000000e+00} -+; CHECK: [[META12:![0-9]+]] = !{float 7.500000e+00} - ;. + } +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll +--- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll ++++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll +@@ -4,6 +4,8 @@ + define i1 @load_bv_v4i8(i1 zeroext %a) { + ; CHECK-LABEL: load_bv_v4i8: + ; CHECK: // %bb.0: ++; CHECK-NEXT: cmp w0, #0 ++; CHECK-NEXT: cset w0, ne + ; CHECK-NEXT: ret + %b = zext i1 %a to i32 + %c = icmp eq i32 %b, 1 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll +--- a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll ++++ b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll +@@ -62,9 +62,9 @@ + ; CHECK-LABEL: fma_reassociate: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +-; CHECK-NEXT: vfmadd.vv v11, v10, v12, v0.t +-; CHECK-NEXT: vfmadd.vv v9, v8, v11, v0.t +-; CHECK-NEXT: vmv.v.v v8, v9 ++; CHECK-NEXT: vfmadd.vv v9, v8, v12, v0.t ++; CHECK-NEXT: vfmadd.vv v11, v10, v9, v0.t ++; CHECK-NEXT: vmv.v.v v8, v11 + ; CHECK-NEXT: ret + %1 = call fast @llvm.vp.fmul.nxv1f64( %a, %b, %m, i32 %vl) + %2 = call fast @llvm.vp.fmul.nxv1f64( %c, %d, %m, i32 %vl) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index aaec90a65c08e6..5abb4304cc53f3 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "ebefe83c092e41d243829ab812bb650674e2f3d2" - LLVM_SHA256 = "0cac9b05231cd3f0f4efb29fad98ef9b6eb9f01c9c99d016a93764033a603426" + LLVM_COMMIT = "3b0f812b9af459c4f857e4a7ffffa01f7a21446e" + LLVM_SHA256 = "47f8c81275437fb4ebcf0286125d683fb946cbddf0b725dc61d786141b32bc08" tf_http_archive( name = name, From 89b9d8d33903f0b448f543af7cc4e6030bd4e44d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 04:28:51 -0700 Subject: [PATCH 136/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/e2e1f9f197af799cbf558eacd26221695a7971ce. PiperOrigin-RevId: 567584745 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index 5b50da65a5d4de..3671380535a7b8 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "73bfccd957234f8cc02c69dc50078288b6d4db8c" - TFRT_SHA256 = "2ae79e0aa046864afc9b43a23ee077c52e2f91a192e62f6ca8e82a68a206f116" + TFRT_COMMIT = "e2e1f9f197af799cbf558eacd26221695a7971ce" + TFRT_SHA256 = "ef6ca7d0ab5fce018a8ae64500de96dffd838d1ed4bef514d797ba2b99bd8908" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index 5b50da65a5d4de..3671380535a7b8 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "73bfccd957234f8cc02c69dc50078288b6d4db8c" - TFRT_SHA256 = "2ae79e0aa046864afc9b43a23ee077c52e2f91a192e62f6ca8e82a68a206f116" + TFRT_COMMIT = "e2e1f9f197af799cbf558eacd26221695a7971ce" + TFRT_SHA256 = "ef6ca7d0ab5fce018a8ae64500de96dffd838d1ed4bef514d797ba2b99bd8908" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index 5b50da65a5d4de..3671380535a7b8 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "73bfccd957234f8cc02c69dc50078288b6d4db8c" - TFRT_SHA256 = "2ae79e0aa046864afc9b43a23ee077c52e2f91a192e62f6ca8e82a68a206f116" + TFRT_COMMIT = "e2e1f9f197af799cbf558eacd26221695a7971ce" + TFRT_SHA256 = "ef6ca7d0ab5fce018a8ae64500de96dffd838d1ed4bef514d797ba2b99bd8908" tf_http_archive( name = "tf_runtime", From 422d09c0badb4bc56fb554a8a50e7e7346a2c972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20Danyluk?= Date: Fri, 22 Sep 2023 04:45:43 -0700 Subject: [PATCH 137/567] [XLA:GPU][NFC] Always profile the cuBLAS version of GEMMs in TritonAutotuner Even if no validation is requested. This is a preparation for a later CL. PiperOrigin-RevId: 567587347 --- .../xla/xla/service/gpu/triton_autotuner.cc | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/third_party/xla/xla/service/gpu/triton_autotuner.cc b/third_party/xla/xla/service/gpu/triton_autotuner.cc index 96ed7527326acb..fd5d77258cb907 100644 --- a/third_party/xla/xla/service/gpu/triton_autotuner.cc +++ b/third_party/xla/xla/service/gpu/triton_autotuner.cc @@ -165,7 +165,7 @@ struct ExecutableCandidate { // This contains all alternative executables related to one fusion. struct ExecutableSet { std::vector candidates; - // This is nullptr iff correctness check is disabled. + // Not nullptr. std::unique_ptr reference; }; @@ -397,9 +397,8 @@ CompileMany(const AutotuneConfig& config, AutotunerCompileUtil& util, const GemmConfigSet& gemm_config_set = key_value.second; config_count += gemm_config_set.configs.size(); } - if (config.should_check_correctness()) { - config_count += gemm_config_sets.size(); - } + // The cuBLAS configs: + config_count += gemm_config_sets.size(); std::atomic done_count = 0; std::atomic good_count = 0; @@ -490,14 +489,12 @@ CompileMany(const AutotuneConfig& config, AutotunerCompileUtil& util, }); } - if (config.should_check_correctness()) { - thread_pool->Schedule([&, fusion] { - StatusOr has_executable = compile_reference_executable(fusion); - TF_CHECK_OK(has_executable.status()); - log(has_executable.value()); - counter.DecrementCount(); - }); - } + thread_pool->Schedule([&, fusion] { + StatusOr has_executable = compile_reference_executable(fusion); + TF_CHECK_OK(has_executable.status()); + log(has_executable.value()); + counter.DecrementCount(); + }); } counter.Wait(); } else { @@ -521,11 +518,9 @@ CompileMany(const AutotuneConfig& config, AutotunerCompileUtil& util, log(has_executable); } - if (config.should_check_correctness()) { - TF_ASSIGN_OR_RETURN(bool has_executable, - compile_reference_executable(fusion)); - log(has_executable); - } + TF_ASSIGN_OR_RETURN(bool has_executable, + compile_reference_executable(fusion)); + log(has_executable); } } @@ -534,9 +529,9 @@ CompileMany(const AutotuneConfig& config, AutotunerCompileUtil& util, return executable_sets; } -// Runs matmul fusion contents without Triton - with cuBLAS, to generate -// a reference output. -StatusOr RunMatmulWithCublas( +// Runs matmul fusion contents without Triton - with cuBLAS, to measure time and +// generate a reference output. +StatusOr RunMatmulWithCublas( AutotunerCompileUtil& util, se::Stream* stream, Executable& executable, absl::Span input_buffers, absl::Span input_shapes) { @@ -544,7 +539,7 @@ StatusOr RunMatmulWithCublas( std::optional output, util.ProfileExecutable(&executable, stream, input_buffers, input_shapes)); TF_RET_CHECK(output.has_value()); - return std::move(output->output); + return std::move(output.value()); } StatusOr Execute(const AutotuneConfig& config, @@ -569,7 +564,6 @@ StatusOr Execute(const AutotuneConfig& config, se::RedzoneAllocator rz_allocator, AutotunerUtil::CreateRedzoneAllocator(config, debug_opts)); - std::optional reference_buffer; const HloInstruction& root = *fusion_computation->root_instruction(); BufferComparator comparator(root.shape(), fusion_computation->parent()->config()); @@ -588,13 +582,21 @@ StatusOr Execute(const AutotuneConfig& config, input_shapes.push_back(param->shape()); } - if (config.should_check_correctness()) { + // Run with cuBLAS. + std::optional reference_buffer; + absl::Duration cublas_duration; + { TF_RET_CHECK(executable_set.reference != nullptr); TF_ASSIGN_OR_RETURN( - reference_buffer, + ProfilingOutput output, RunMatmulWithCublas(util, stream, *executable_set.reference, inputs, input_shapes)); + if (config.should_check_correctness()) { + reference_buffer = std::move(output.output); + } + cublas_duration = output.duration; } + VLOG(3) << "Running with cuBLAS took: " << cublas_duration; const int log_every_n = GetLogEveryN(); int64_t executable_count = From 515d795bae6b706719cc301655957c94d621d3d5 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Fri, 22 Sep 2023 05:31:23 -0700 Subject: [PATCH 138/567] [XLA:GPU] Re-add logging of Triton IR. cl/560999729 replaced logging with dumps but dumping is not enabled on autotuning variants and thus does not replace logging for cases when autotuned variants crash. PiperOrigin-RevId: 567594816 --- third_party/xla/xla/service/gpu/ir_emitter_triton.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc index d431f1349c294f..70457113f8be7b 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc @@ -1661,6 +1661,8 @@ StatusOr TritonWrapper( device_info.shared_memory_per_block_optin)); b.create(loc); + + VLOG(6) << llvm_ir::DumpToString(*triton_module); if (DumpingEnabledForHloModule(*hlo_computation->parent())) { DumpToFileInDirOrStdout(*hlo_computation->parent(), "triton_ir", "ttir", llvm_ir::DumpToString(*triton_module)); From fad56f6ce87d25e05b43bae1c32d1fff86a95dd0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 05:56:34 -0700 Subject: [PATCH 139/567] Remove clang16 toolchain and rename clang17 toolchain The upgrade to LLVM-17 is finished, so we can remove the old clang16 based toolchain and rename the LLVM17 toolchain. PiperOrigin-RevId: 567598888 --- .bazelrc | 24 +++++----- ci/official/bazelrcs/cpu.bazelrc | 16 +++---- ci/official/bazelrcs/cuda.bazelrc | 24 +++++----- .../toolchains/remote_config/configs.bzl | 47 ++----------------- third_party/xla/.bazelrc | 24 +++++----- third_party/xla/third_party/tsl/.bazelrc | 24 +++++----- .../toolchains/remote_config/configs.bzl | 47 ++----------------- .../toolchains/remote_config/configs.bzl | 47 ++----------------- 8 files changed, 68 insertions(+), 185 deletions(-) diff --git a/.bazelrc b/.bazelrc index 475a1c5378cdfe..98eff1ea726464 100644 --- a/.bazelrc +++ b/.bazelrc @@ -248,7 +248,7 @@ build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2" build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" -build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Debug config build:dbg -c dbg @@ -457,12 +457,12 @@ build:rbe_linux --host_linkopt=-lm build:rbe_linux_cpu --config=rbe_linux # Linux cpu and cuda builds share the same toolchain now. -build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang_config_platform//:platform" # This is needed for all Clang17 builds but must not be present in GCC builds. build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument # This was added in clang-16 by https://reviews.llvm.org/D133574. @@ -471,7 +471,7 @@ build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument # See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183. build:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions # Python config is the same across all containers because the binary is the same -build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" +build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" build:rbe_linux_cpu --python_path="/usr/bin/python3" # These you may need to change for your own GCP project. common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance @@ -494,9 +494,9 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 -build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang17_config_cuda" -build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang17_config_tensorrt" -build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang17_config_nccl" +build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang_config_cuda" +build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang_config_tensorrt" +build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang_config_nccl" test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" # TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed @@ -558,7 +558,7 @@ test:release_base --test_size_filters=small,medium # Target the AVX instruction set build:release_cpu_linux --config=avx_linux # Use the Clang toolchain to compile -build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Disable clang extention that rejects type definitions within offsetof. # This was added in clang-16 by https://reviews.llvm.org/D133574. # Can be removed once upb is updated, since a type definition is used within diff --git a/ci/official/bazelrcs/cpu.bazelrc b/ci/official/bazelrcs/cpu.bazelrc index 096893bdc096c0..43c951e3532466 100644 --- a/ci/official/bazelrcs/cpu.bazelrc +++ b/ci/official/bazelrcs/cpu.bazelrc @@ -49,7 +49,7 @@ build --linkopt="-lm" build --copt=-Wno-gnu-offsetof-extensions # Use the NVCC toolchain to compile for manylinux2014 -build --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Test-related settings below this point. test --build_tests_only --keep_going --test_output=errors --verbose_failures=true @@ -88,14 +88,14 @@ build:rbe --spawn_strategy=remote,worker,standalone,local build:rbe --remote_download_toplevel build:rbe --action_env=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin" build:rbe --linkopt=-lrt --host_linkopt=-lrt --linkopt=-lm --host_linkopt=-lm # Unclear why this is here -build:rbe --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe --platforms="@sigbuild-r2.14-clang_config_platform//:platform" # Python config is the same across all containers because the binary is the same -build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" +build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" build:rbe --remote_instance_name=projects/tensorflow-testing/instances/default_instance # For continuous builds diff --git a/ci/official/bazelrcs/cuda.bazelrc b/ci/official/bazelrcs/cuda.bazelrc index 14f4c4b96d0b6e..1bb260687d35fa 100644 --- a/ci/official/bazelrcs/cuda.bazelrc +++ b/ci/official/bazelrcs/cuda.bazelrc @@ -60,7 +60,7 @@ build --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" build --action_env=TF_CUDA_CLANG="1" build --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/tensorrt/lib" -build --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # CUDA: Enable TensorRT optimizations # https://developer.nvidia.com/tensorrt @@ -111,24 +111,24 @@ build:rbe --spawn_strategy=remote,worker,standalone,local build:rbe --remote_download_toplevel build:rbe --action_env=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin" build:rbe --linkopt=-lrt --host_linkopt=-lrt --linkopt=-lm --host_linkopt=-lm # Unclear why this is here -build:rbe --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe --platforms="@sigbuild-r2.14-clang_config_platform//:platform" # Python config is the same across all containers because the binary is the same -build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" +build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" build:rbe --remote_instance_name=projects/tensorflow-testing/instances/default_instance build:rbe --project_id="tensorflow-testing" # For Remote build execution -- GPU configuration build:rbe --repo_env=REMOTE_GPU_TESTING=1 test:rbe --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" -build:rbe --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang17_config_cuda" -build:rbe --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang17_config_tensorrt" -build:rbe --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang17_config_nccl" -build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" +build:rbe --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang_config_cuda" +build:rbe --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang_config_tensorrt" +build:rbe --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang_config_nccl" +build:rbe --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" # For continuous builds test:pycpp_filters --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-benchmark-test,-v1only,gpu,-no_gpu,-no_gpu_presubmit,-no_cuda11 diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index a1fd875f4bd5e5..6e37be2447c0ec 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -620,49 +620,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14-clang": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-clang-python3.8": "docker://gcr.io/tensorflow-sigs/build@sha256:c46d275e5bc760b7af465dc063629b234cfa34aabf0c7fe30581effc0b99648a", - "sigbuild-r2.14-clang-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-clang-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:06b3a97ef247dbb00a9c6d8315e4e035d891ae2f18088de254f15d6ecedadfb9", - "sigbuild-r2.14-clang-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:00dc9e13130727dcdeb54ca77423e317a79aae84d5783c05b38b7bbdf753f0f6", - }, - # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 - # and manylinux2014 is 2.17. - env = { - "ABI_LIBC_VERSION": "glibc_2.19", - "ABI_VERSION": "gcc", - "BAZEL_COMPILER": "/usr/lib/llvm-16/bin/clang", - "BAZEL_HOST_SYSTEM": "i686-unknown-linux-gnu", - "BAZEL_TARGET_CPU": "k8", - "BAZEL_TARGET_LIBC": "glibc_2.19", - "BAZEL_TARGET_SYSTEM": "x86_64-unknown-linux-gnu", - "CC": "/usr/lib/llvm-16/bin/clang", - "CC_TOOLCHAIN_NAME": "linux_gnu_x86", - "CLEAR_CACHE": "1", - "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", - "CLANG_CUDA_COMPILER_PATH": "/usr/lib/llvm-16/bin/clang", - "HOST_CXX_COMPILER": "/usr/lib/llvm-16/bin/clang", - "HOST_C_COMPILER": "/usr/lib/llvm-16/bin/clang", - "PYTHON_BIN_PATH": "/usr/bin/python3", - "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", - "TF_CUDA_CLANG": "1", - "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", - "TF_CUDA_VERSION": "11.2", - "TF_CUDNN_VERSION": "8.1", - "TF_ENABLE_XLA": "1", - "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", - "TF_SYSROOT": "/dt9", - "TF_TENSORRT_VERSION": "7.2", - }, - ) - - sigbuild_tf_configs( - name_container_map = { - "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", - "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", - "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", - "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", + "sigbuild-r2.14-clang": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-clang-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. diff --git a/third_party/xla/.bazelrc b/third_party/xla/.bazelrc index 475a1c5378cdfe..98eff1ea726464 100644 --- a/third_party/xla/.bazelrc +++ b/third_party/xla/.bazelrc @@ -248,7 +248,7 @@ build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2" build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" -build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Debug config build:dbg -c dbg @@ -457,12 +457,12 @@ build:rbe_linux --host_linkopt=-lm build:rbe_linux_cpu --config=rbe_linux # Linux cpu and cuda builds share the same toolchain now. -build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang_config_platform//:platform" # This is needed for all Clang17 builds but must not be present in GCC builds. build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument # This was added in clang-16 by https://reviews.llvm.org/D133574. @@ -471,7 +471,7 @@ build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument # See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183. build:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions # Python config is the same across all containers because the binary is the same -build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" +build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" build:rbe_linux_cpu --python_path="/usr/bin/python3" # These you may need to change for your own GCP project. common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance @@ -494,9 +494,9 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 -build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang17_config_cuda" -build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang17_config_tensorrt" -build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang17_config_nccl" +build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang_config_cuda" +build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang_config_tensorrt" +build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang_config_nccl" test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" # TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed @@ -558,7 +558,7 @@ test:release_base --test_size_filters=small,medium # Target the AVX instruction set build:release_cpu_linux --config=avx_linux # Use the Clang toolchain to compile -build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Disable clang extention that rejects type definitions within offsetof. # This was added in clang-16 by https://reviews.llvm.org/D133574. # Can be removed once upb is updated, since a type definition is used within diff --git a/third_party/xla/third_party/tsl/.bazelrc b/third_party/xla/third_party/tsl/.bazelrc index 475a1c5378cdfe..98eff1ea726464 100644 --- a/third_party/xla/third_party/tsl/.bazelrc +++ b/third_party/xla/third_party/tsl/.bazelrc @@ -248,7 +248,7 @@ build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.2" build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" -build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:cuda_clang_official --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Debug config build:dbg -c dbg @@ -457,12 +457,12 @@ build:rbe_linux --host_linkopt=-lm build:rbe_linux_cpu --config=rbe_linux # Linux cpu and cuda builds share the same toolchain now. -build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" -build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain-linux-x86_64" -build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang17_config_platform//:platform" -build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang17_config_platform//:platform" +build:rbe_linux_cpu --host_crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe_linux_cpu --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" +build:rbe_linux_cpu --extra_toolchains="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain-linux-x86_64" +build:rbe_linux_cpu --extra_execution_platforms="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe_linux_cpu --host_platform="@sigbuild-r2.14-clang_config_platform//:platform" +build:rbe_linux_cpu --platforms="@sigbuild-r2.14-clang_config_platform//:platform" # This is needed for all Clang17 builds but must not be present in GCC builds. build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument # This was added in clang-16 by https://reviews.llvm.org/D133574. @@ -471,7 +471,7 @@ build:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument # See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183. build:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions # Python config is the same across all containers because the binary is the same -build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang17_config_python" +build:rbe_linux_cpu --repo_env=TF_PYTHON_CONFIG_REPO="@sigbuild-r2.14-clang_config_python" build:rbe_linux_cpu --python_path="/usr/bin/python3" # These you may need to change for your own GCP project. common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance @@ -494,9 +494,9 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 -build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang17_config_cuda" -build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang17_config_tensorrt" -build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang17_config_nccl" +build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.14-clang_config_cuda" +build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.14-clang_config_tensorrt" +build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.14-clang_config_nccl" test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" # TODO(kanglan): Remove rbe_win and rbe_win_py3* after b/289091160 is fixed @@ -558,7 +558,7 @@ test:release_base --test_size_filters=small,medium # Target the AVX instruction set build:release_cpu_linux --config=avx_linux # Use the Clang toolchain to compile -build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang17_config_cuda//crosstool:toolchain" +build:release_cpu_linux --crosstool_top="@sigbuild-r2.14-clang_config_cuda//crosstool:toolchain" # Disable clang extention that rejects type definitions within offsetof. # This was added in clang-16 by https://reviews.llvm.org/D133574. # Can be removed once upb is updated, since a type definition is used within diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl index 2453dc746feefb..666e51de5ffb71 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl @@ -620,49 +620,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14-clang": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-clang-python3.8": "docker://gcr.io/tensorflow-sigs/build@sha256:c46d275e5bc760b7af465dc063629b234cfa34aabf0c7fe30581effc0b99648a", - "sigbuild-r2.14-clang-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-clang-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:06b3a97ef247dbb00a9c6d8315e4e035d891ae2f18088de254f15d6ecedadfb9", - "sigbuild-r2.14-clang-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:00dc9e13130727dcdeb54ca77423e317a79aae84d5783c05b38b7bbdf753f0f6", - }, - # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 - # and manylinux2014 is 2.17. - env = { - "ABI_LIBC_VERSION": "glibc_2.19", - "ABI_VERSION": "gcc", - "BAZEL_COMPILER": "/usr/lib/llvm-16/bin/clang", - "BAZEL_HOST_SYSTEM": "i686-unknown-linux-gnu", - "BAZEL_TARGET_CPU": "k8", - "BAZEL_TARGET_LIBC": "glibc_2.19", - "BAZEL_TARGET_SYSTEM": "x86_64-unknown-linux-gnu", - "CC": "/usr/lib/llvm-16/bin/clang", - "CC_TOOLCHAIN_NAME": "linux_gnu_x86", - "CLEAR_CACHE": "1", - "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", - "CLANG_CUDA_COMPILER_PATH": "/usr/lib/llvm-16/bin/clang", - "HOST_CXX_COMPILER": "/usr/lib/llvm-16/bin/clang", - "HOST_C_COMPILER": "/usr/lib/llvm-16/bin/clang", - "PYTHON_BIN_PATH": "/usr/bin/python3", - "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", - "TF_CUDA_CLANG": "1", - "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", - "TF_CUDA_VERSION": "11.2", - "TF_CUDNN_VERSION": "8.1", - "TF_ENABLE_XLA": "1", - "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", - "TF_SYSROOT": "/dt9", - "TF_TENSORRT_VERSION": "7.2", - }, - ) - - sigbuild_tf_configs( - name_container_map = { - "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", - "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", - "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", - "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", + "sigbuild-r2.14-clang": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-clang-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index 2453dc746feefb..666e51de5ffb71 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -620,49 +620,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14-clang": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-clang-python3.8": "docker://gcr.io/tensorflow-sigs/build@sha256:c46d275e5bc760b7af465dc063629b234cfa34aabf0c7fe30581effc0b99648a", - "sigbuild-r2.14-clang-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-clang-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:06b3a97ef247dbb00a9c6d8315e4e035d891ae2f18088de254f15d6ecedadfb9", - "sigbuild-r2.14-clang-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:00dc9e13130727dcdeb54ca77423e317a79aae84d5783c05b38b7bbdf753f0f6", - }, - # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 - # and manylinux2014 is 2.17. - env = { - "ABI_LIBC_VERSION": "glibc_2.19", - "ABI_VERSION": "gcc", - "BAZEL_COMPILER": "/usr/lib/llvm-16/bin/clang", - "BAZEL_HOST_SYSTEM": "i686-unknown-linux-gnu", - "BAZEL_TARGET_CPU": "k8", - "BAZEL_TARGET_LIBC": "glibc_2.19", - "BAZEL_TARGET_SYSTEM": "x86_64-unknown-linux-gnu", - "CC": "/usr/lib/llvm-16/bin/clang", - "CC_TOOLCHAIN_NAME": "linux_gnu_x86", - "CLEAR_CACHE": "1", - "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", - "CLANG_CUDA_COMPILER_PATH": "/usr/lib/llvm-16/bin/clang", - "HOST_CXX_COMPILER": "/usr/lib/llvm-16/bin/clang", - "HOST_C_COMPILER": "/usr/lib/llvm-16/bin/clang", - "PYTHON_BIN_PATH": "/usr/bin/python3", - "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", - "TF_CUDA_CLANG": "1", - "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", - "TF_CUDA_VERSION": "11.2", - "TF_CUDNN_VERSION": "8.1", - "TF_ENABLE_XLA": "1", - "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", - "TF_SYSROOT": "/dt9", - "TF_TENSORRT_VERSION": "7.2", - }, - ) - - sigbuild_tf_configs( - name_container_map = { - "sigbuild-r2.14-clang17": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", - "sigbuild-r2.14-clang17-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", - "sigbuild-r2.14-clang17-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", - "sigbuild-r2.14-clang17-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", + "sigbuild-r2.14-clang": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-clang-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-clang-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. From 16c23ebd42c6487544881fd361fc0cd45d2abc25 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 06:25:38 -0700 Subject: [PATCH 140/567] Add fusion pattern for `jax.nn.log_softmax` PiperOrigin-RevId: 567604558 --- .../compiler/mlir/lite/tests/optimize.mlir | 65 +++++++++++++ .../compiler/mlir/lite/transforms/optimize.cc | 91 ++++++++++++++++++- 2 files changed, 152 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index 3fa644f382e31a..26369cd1f13e1e 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -3718,3 +3718,68 @@ func.func @FuseTransposeAfterBatchMatmul(%arg0: tensor<4x1024xf32>, %arg1: tenso func.return %1 : tensor<8x4xf32> // CHECK: return %[[RES0]] : tensor<8x4xf32> } + +// CHECK-LABEL: fuseLogSoftmax +func.func @fuseLogSoftmax(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { + %0 = arith.constant dense<1> : tensor<1xi32> + %1 = "tfl.reduce_max"(%arg0, %0) {keep_dims = true} : (tensor<10x10xf32>, tensor<1xi32>) -> tensor<10x1xf32> + %2 = tfl.sub(%arg0, %1) {fused_activation_function = "NONE"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<10x10xf32> + %3 = "tfl.exp"(%2) : (tensor<10x10xf32>) -> tensor<10x10xf32> + %4 = "tfl.sum"(%3, %0) {keep_dims = true} : (tensor<10x10xf32>, tensor<1xi32>) -> tensor<10x1xf32> + %5 = "tfl.log"(%4) : (tensor<10x1xf32>) -> tensor<10x1xf32> + %6 = tfl.sub(%2, %5) {fused_activation_function = "NONE"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<10x10xf32> + return %6 : tensor<10x10xf32> + // CHECK: "tfl.log_softmax"(%arg0) : (tensor<10x10xf32>) -> tensor<10x10xf32> +} + +// CHECK-LABEL: fuseLogSoftmaxAxisNegativeOne +func.func @fuseLogSoftmaxAxisNegativeOne(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { + %0 = arith.constant dense<-1> : tensor<1xi32> + %1 = "tfl.reduce_max"(%arg0, %0) {keep_dims = true} : (tensor<10x10xf32>, tensor<1xi32>) -> tensor<10x1xf32> + %2 = tfl.sub(%arg0, %1) {fused_activation_function = "NONE"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<10x10xf32> + %3 = "tfl.exp"(%2) : (tensor<10x10xf32>) -> tensor<10x10xf32> + %4 = "tfl.sum"(%3, %0) {keep_dims = true} : (tensor<10x10xf32>, tensor<1xi32>) -> tensor<10x1xf32> + %5 = "tfl.log"(%4) : (tensor<10x1xf32>) -> tensor<10x1xf32> + %6 = tfl.sub(%2, %5) {fused_activation_function = "NONE"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<10x10xf32> + return %6 : tensor<10x10xf32> + // CHECK: "tfl.log_softmax"(%arg0) : (tensor<10x10xf32>) -> tensor<10x10xf32> +} + +// CHECK-LABEL: fuseLogSoftmaxFusedActivationFunction +func.func @fuseLogSoftmaxFusedActivationFunction(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { + %0 = arith.constant dense<1> : tensor<1xi32> + %1 = "tfl.reduce_max"(%arg0, %0) {keep_dims = true} : (tensor<10x10xf32>, tensor<1xi32>) -> tensor<10x1xf32> + %2 = tfl.sub(%arg0, %1) {fused_activation_function = "RELU"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<10x10xf32> + %3 = "tfl.exp"(%2) : (tensor<10x10xf32>) -> tensor<10x10xf32> + %4 = "tfl.sum"(%3, %0) {keep_dims = true} : (tensor<10x10xf32>, tensor<1xi32>) -> tensor<10x1xf32> + %5 = "tfl.log"(%4) : (tensor<10x1xf32>) -> tensor<10x1xf32> + %6 = tfl.sub(%2, %5) {fused_activation_function = "RELU"} : (tensor<10x10xf32>, tensor<10x1xf32>) -> tensor<10x10xf32> + return %6 : tensor<10x10xf32> + // CHECK-NOT: "tfl.log_softmax"(%arg0) : (tensor<10x10xf32>) -> tensor<10x10xf32> +} + +// CHECK-LABEL: fuseLogSoftmax1D +func.func @fuseLogSoftmax1D(%arg0: tensor<10xf32>) -> tensor<10xf32> { + %0 = arith.constant dense<0> : tensor<1xi32> + %1 = "tfl.reduce_max"(%arg0, %0) {keep_dims = true} : (tensor<10xf32>, tensor<1xi32>) -> tensor<1xf32> + %2 = tfl.sub(%arg0, %1) {fused_activation_function = "NONE"} : (tensor<10xf32>, tensor<1xf32>) -> tensor<10xf32> + %3 = "tfl.exp"(%2) : (tensor<10xf32>) -> tensor<10xf32> + %4 = "tfl.sum"(%3, %0) {keep_dims = true} : (tensor<10xf32>, tensor<1xi32>) -> tensor<1xf32> + %5 = "tfl.log"(%4) : (tensor<1xf32>) -> tensor<1xf32> + %6 = tfl.sub(%2, %5) {fused_activation_function = "NONE"} : (tensor<10xf32>, tensor<1xf32>) -> tensor<10xf32> + return %6 : tensor<10xf32> + // CHECK: "tfl.log_softmax"(%arg0) : (tensor<10xf32>) -> tensor<10xf32> +} + +// CHECK-LABEL: fuseLogSoftmaxNotLastAxis +func.func @fuseLogSoftmaxNotLastAxis(%arg0: tensor<10x10x10xf32>) -> tensor<10x10x10xf32> { + %0 = arith.constant dense<1> : tensor<1xi32> + %1 = "tfl.reduce_max"(%arg0, %0) {keep_dims = true} : (tensor<10x10x10xf32>, tensor<1xi32>) -> tensor<10x1x10xf32> + %2 = tfl.sub(%arg0, %1) {fused_activation_function = "NONE"} : (tensor<10x10x10xf32>, tensor<10x1x10xf32>) -> tensor<10x10x10xf32> + %3 = "tfl.exp"(%2) : (tensor<10x10x10xf32>) -> tensor<10x10x10xf32> + %4 = "tfl.sum"(%3, %0) {keep_dims = true} : (tensor<10x10x10xf32>, tensor<1xi32>) -> tensor<10x1x10xf32> + %5 = "tfl.log"(%4) : (tensor<10x1x10xf32>) -> tensor<10x1x10xf32> + %6 = tfl.sub(%2, %5) {fused_activation_function = "NONE"} : (tensor<10x10x10xf32>, tensor<10x1x10xf32>) -> tensor<10x10x10xf32> + return %6 : tensor<10x10x10xf32> + // CHECK-NOT: "tfl.log_softmax"(%arg0) : (tensor<10x10x10f32>) -> tensor<10x10x10xf32> +} diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index d09a92e0608347..64bff681053f6e 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -2203,6 +2203,88 @@ struct FuseTransposeReshapeIntoBatchMatmul } }; +struct FuseLogSoftmax : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(TFL::SubOp sub_op, + PatternRewriter &rewriter) const override { + if (sub_op.getFusedActivationFunction() != "NONE") { + return failure(); + } + auto log_op = dyn_cast_or_null(sub_op.getRhs().getDefiningOp()); + if (!log_op || !log_op->hasOneUse()) { + return failure(); + } + auto sum_op = dyn_cast_or_null(log_op.getX().getDefiningOp()); + if (!sum_op || !sum_op.getKeepDims() || + !isSupportedAxis( + sum_op.getAxes(), + sum_op.getOperand(0).getType().cast().getRank())) { + return failure(); + } + if (!sum_op->hasOneUse()) { + return failure(); + } + auto exp_op = + dyn_cast_or_null(sum_op.getInput().getDefiningOp()); + if (!exp_op || !exp_op->hasOneUse()) { + return failure(); + } + + auto parent_sub_op = + dyn_cast_or_null(sub_op.getLhs().getDefiningOp()); + if (!parent_sub_op || parent_sub_op != dyn_cast_or_null( + exp_op.getX().getDefiningOp())) { + return failure(); + } + if (std::distance(parent_sub_op->getUses().begin(), + parent_sub_op->getUses().end()) != 2) { + return failure(); + } + + auto reduce_max_op = dyn_cast_or_null( + parent_sub_op.getRhs().getDefiningOp()); + if (!reduce_max_op || !reduce_max_op->hasOneUse() || + !reduce_max_op.getKeepDims() || + !isSupportedAxis(reduce_max_op.getAxes(), reduce_max_op.getOperand(0) + .getType() + .cast() + .getRank())) { + return failure(); + } + + if (reduce_max_op.getInput() != parent_sub_op.getLhs()) { + return failure(); + } + + rewriter.replaceOpWithNewOp(sub_op, sub_op.getType(), + parent_sub_op.getLhs()); + return success(); + } + + // The TFL_LogSoftmaxOp implementation only works on the last axis, so we + // check that both TFL_ReduceMaxOP and TFL_SumOp use the last axis + bool isSupportedAxis(mlir::Value value, int64_t rank) const { + auto const_op = + dyn_cast_or_null(value.getDefiningOp()); + if (!const_op) { + return false; + } + auto axes = dyn_cast(const_op.getValueAttr()); + if (!axes || axes.getNumElements() != 1) { + return false; + } + auto axes_elem_ty = axes.getType().getElementType(); + if (!axes_elem_ty.isInteger(32) && !axes_elem_ty.isInteger(64)) { + return false; + } + const int64_t axis = (*axes.begin()).getSExtValue(); + if (axis != rank - 1 && axis != -1) { + return false; + } + return true; + } +}; + // Adds canonicalization patterns to the list of patterns. void AddCanonicalizationPatterns(MLIRContext *context, RewritePatternSet *patterns) { @@ -2244,10 +2326,11 @@ void OptimizePass::runOnOperation() { RewritePatternSet phase_2_patterns(&getContext()); TFL::populateWithGenerated(phase_2_patterns); phase_2_patterns.add< - ScalarizeSplatConstantForAdd, ScalarizeSplatConstantForSub, - ScalarizeSplatConstantForMul, ScalarizeSplatConstantForDiv, - FuseFullyConnectedAndAdd, FuseAddAndFullyConnected, - FuseFullyConnectedAndMul, FuseFullyConnectedAndReluX, + FuseLogSoftmax, ScalarizeSplatConstantForAdd, + ScalarizeSplatConstantForSub, ScalarizeSplatConstantForMul, + ScalarizeSplatConstantForDiv, FuseFullyConnectedAndAdd, + FuseAddAndFullyConnected, FuseFullyConnectedAndMul, + FuseFullyConnectedAndReluX, FuseFullyConnectedAndReluX, FuseFullyConnectedAndReluX, FuseBinaryOpToFollowingConv2D, FuseBinaryOpToFollowingDepthwiseConv2D, From 8c87d6780b91eeb010a611e37599ac3f01f32f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20Danyluk?= Date: Fri, 22 Sep 2023 06:31:59 -0700 Subject: [PATCH 141/567] [XLA:GPU][NFC] Separate TritonAutotuner from other autotuning passes This is a preparation for a later CL. PiperOrigin-RevId: 567605682 --- .../xla/xla/service/gpu/amdgpu_compiler.cc | 3 +-- .../xla/xla/service/gpu/amdgpu_compiler.h | 7 ++++--- third_party/xla/xla/service/gpu/gpu_compiler.cc | 6 ++++-- third_party/xla/xla/service/gpu/gpu_compiler.h | 16 +++++++++++----- .../xla/xla/service/gpu/nvptx_compiler.cc | 7 ++++++- third_party/xla/xla/service/gpu/nvptx_compiler.h | 12 +++++++++--- 6 files changed, 35 insertions(+), 16 deletions(-) diff --git a/third_party/xla/xla/service/gpu/amdgpu_compiler.cc b/third_party/xla/xla/service/gpu/amdgpu_compiler.cc index 723ef0df23da3e..74bd6c6c01d699 100644 --- a/third_party/xla/xla/service/gpu/amdgpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/amdgpu_compiler.cc @@ -149,7 +149,7 @@ bool AMDGPUCompiler::RequiresCollectiveScheduleLinearizer( return false; } -Status AMDGPUCompiler::AddAutotuningPasses( +Status AMDGPUCompiler::AddConvAndGemmAutotuningPasses( HloPassPipeline* pipeline, HloModule* hlo_module, AutotuneConfig& autotune_config, tsl::thread::ThreadPool* thread_pool) { if (GpuConvAlgorithmPicker::IsEnabled(hlo_module)) { @@ -157,7 +157,6 @@ Status AMDGPUCompiler::AddAutotuningPasses( } // TODO: // pipeline->AddPass(autotune_config); - // pipeline->AddPass(autotune_config, thread_pool); return OkStatus(); } diff --git a/third_party/xla/xla/service/gpu/amdgpu_compiler.h b/third_party/xla/xla/service/gpu/amdgpu_compiler.h index 37edfe78e0b063..c05d256d2e32ea 100644 --- a/third_party/xla/xla/service/gpu/amdgpu_compiler.h +++ b/third_party/xla/xla/service/gpu/amdgpu_compiler.h @@ -47,9 +47,10 @@ class AMDGPUCompiler : public GpuCompiler { bool RequiresCollectiveScheduleLinearizer( const HloModule* module, se::StreamExecutor* stream_exec) override; - Status AddAutotuningPasses(HloPassPipeline* pipeline, HloModule* hlo_module, - AutotuneConfig& autotune_config, - tsl::thread::ThreadPool* thread_pool) override; + Status AddConvAndGemmAutotuningPasses( + HloPassPipeline* pipeline, HloModule* hlo_module, + AutotuneConfig& autotune_config, + tsl::thread::ThreadPool* thread_pool) override; Status LoadAutotuneResultsFromFile( const DebugOptions& debug_options) override; diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 66dd3fd2ac57bc..ec61161d3b5ef0 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -1015,8 +1015,10 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( // f32). add_float_normalization(pipeline); - TF_RETURN_IF_ERROR( - AddAutotuningPasses(&pipeline, hlo_module, autotune_config, thread_pool)); + TF_RETURN_IF_ERROR(AddConvAndGemmAutotuningPasses( + &pipeline, hlo_module, autotune_config, thread_pool)); + TF_RETURN_IF_ERROR(AddTritonGemmAutotuningPasses( + &pipeline, hlo_module, autotune_config, thread_pool)); // The Triton autotuner can insert new bf16 reductions that need to be // normalized again. diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.h b/third_party/xla/xla/service/gpu/gpu_compiler.h index 42d219c6c63d99..2a34ee7b7f86e5 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.h +++ b/third_party/xla/xla/service/gpu/gpu_compiler.h @@ -193,11 +193,17 @@ class GpuCompiler : public LLVMCompiler { return false; } - // Add autotuning passes for convolution, gemm and triton. - virtual Status AddAutotuningPasses(HloPassPipeline* pipeline, - HloModule* hlo_module, - AutotuneConfig& autotune_config, - tsl::thread::ThreadPool* thread_pool) { + // Add autotuning passes for convolution and gemm (except triton). + virtual Status AddConvAndGemmAutotuningPasses( + HloPassPipeline* pipeline, HloModule* hlo_module, + AutotuneConfig& autotune_config, tsl::thread::ThreadPool* thread_pool) { + return OkStatus(); + } + + // Add autotuning passes for triton gemm. + virtual Status AddTritonGemmAutotuningPasses( + HloPassPipeline* pipeline, HloModule* hlo_module, + AutotuneConfig& autotune_config, tsl::thread::ThreadPool* thread_pool) { return OkStatus(); } diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler.cc b/third_party/xla/xla/service/gpu/nvptx_compiler.cc index 8dd3b2fc8d74c6..5ee7f942e30fed 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler.cc +++ b/third_party/xla/xla/service/gpu/nvptx_compiler.cc @@ -292,14 +292,19 @@ bool NVPTXCompiler::RequiresCollectiveScheduleLinearizer( return false; } -Status NVPTXCompiler::AddAutotuningPasses( +Status NVPTXCompiler::AddConvAndGemmAutotuningPasses( HloPassPipeline* pipeline, HloModule* hlo_module, AutotuneConfig& autotune_config, tsl::thread::ThreadPool* thread_pool) { if (GpuConvAlgorithmPicker::IsEnabled(hlo_module)) { pipeline->AddPass(autotune_config); } pipeline->AddPass(autotune_config); + return OkStatus(); +} +Status NVPTXCompiler::AddTritonGemmAutotuningPasses( + HloPassPipeline* pipeline, HloModule* hlo_module, + AutotuneConfig& autotune_config, tsl::thread::ThreadPool* thread_pool) { pipeline->AddPass(autotune_config, thread_pool); return OkStatus(); } diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler.h b/third_party/xla/xla/service/gpu/nvptx_compiler.h index cdc4b409e11030..debc4539dfe161 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler.h +++ b/third_party/xla/xla/service/gpu/nvptx_compiler.h @@ -53,9 +53,15 @@ class NVPTXCompiler : public GpuCompiler { bool RequiresCollectiveScheduleLinearizer( const HloModule* module, se::StreamExecutor* stream_exec) override; - Status AddAutotuningPasses(HloPassPipeline* pipeline, HloModule* hlo_module, - AutotuneConfig& autotune_config, - tsl::thread::ThreadPool* thread_pool) override; + Status AddConvAndGemmAutotuningPasses( + HloPassPipeline* pipeline, HloModule* hlo_module, + AutotuneConfig& autotune_config, + tsl::thread::ThreadPool* thread_pool) override; + + Status AddTritonGemmAutotuningPasses( + HloPassPipeline* pipeline, HloModule* hlo_module, + AutotuneConfig& autotune_config, + tsl::thread::ThreadPool* thread_pool) override; Status LoadAutotuneResultsFromFile( const DebugOptions& debug_options) override; From ff8fd53f5cf138c582fdfa906570cce66ba615c2 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Fri, 22 Sep 2023 07:45:58 -0700 Subject: [PATCH 142/567] [XLA:GPU] Fix and cover with tests type conversions in Triton emitters. PiperOrigin-RevId: 567619889 --- third_party/xla/xla/service/gpu/BUILD | 4 + .../xla/xla/service/gpu/ir_emitter_triton.cc | 138 ++++++++++-------- .../ir_emitter_triton_parametrized_test.cc | 119 ++++++++++++--- 3 files changed, 181 insertions(+), 80 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index f6ba7b004e3988..17eb3ead3fd0a4 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -626,7 +626,10 @@ xla_test( "//xla:comparison_util", "//xla:error_spec", "//xla:xla_data_proto_cc", + "//xla:xla_proto_cc", "//xla/hlo/ir:hlo", + "//xla/service:pattern_matcher", + "//xla/service:pattern_matcher_gmock", "//xla/service/gpu/tests:gpu_codegen_test", "//xla/stream_executor:device_description", "//xla/stream_executor/cuda:cublas_plugin", @@ -634,6 +637,7 @@ xla_test( "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", + "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform:tensor_float_32_hdr_lib", ], ) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc index 70457113f8be7b..9b7fbf675625ae 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc @@ -153,7 +153,7 @@ Type TritonType(mlir::OpBuilder b, PrimitiveType t) { case S16: return b.getI16Type(); case PRED: - // Treat PRED as S8. + return b.getI1Type(); case S8: return b.getI8Type(); default: @@ -162,6 +162,67 @@ Type TritonType(mlir::OpBuilder b, PrimitiveType t) { } } +Type StorageType(mlir::OpBuilder b, Type t) { + if (t.isInteger(1)) { + return b.getI8Type(); + } + return t; +} + +// Get the value of the scalar constant's literal in a C++ type. +template +T ScalarConstantValue(const HloInstruction& instr, PrimitiveType dst_type) { + CHECK(hlo_query::IsScalarConstant(&instr)); + StatusOr converted = instr.literal().Convert(dst_type); + TF_CHECK_OK(converted.status()); + return converted.value().GetFirstElement(); +} + +// Create a scalar constant. +template +ma::ConstantOp CreateConst(ImplicitLocOpBuilder b, Type type, T value) { + if (type.isa()) { + return b.create(b.getIntegerAttr(type, value)); + } + if (type.isa()) { + return b.create( + b.getFloatAttr(type, static_cast(value))); + } + LOG(FATAL) << "Constant type not supported: " << llvm_ir::DumpToString(type); +} + +// Create a tensor constant. +template +ma::ConstantOp CreateConst(ImplicitLocOpBuilder& b, Type type, T value, + ArrayRef shape) { + auto tensor_type = mlir::RankedTensorType::get(shape, type); + if (auto int_type = type.dyn_cast()) { + return b.create(mlir::DenseElementsAttr::get( + tensor_type, mlir::APInt(int_type.getIntOrFloatBitWidth(), value))); + } + if (auto float_type = type.dyn_cast()) { + return b.create(mlir::DenseElementsAttr::get( + tensor_type, b.getFloatAttr(type, static_cast(value)))); + } + LOG(FATAL) << "Constant type not supported: " << llvm_ir::DumpToString(type); +} + +Value ZerosLike(ImplicitLocOpBuilder& b, Value x) { + if (auto src_shaped_ty = x.getType().dyn_cast()) { + Type src_ty = src_shaped_ty.getElementType(); + return CreateConst(b, src_ty, 0, src_shaped_ty.getShape()); + } + return CreateConst(b, x.getType(), 0); +} + +Value OnesLike(ImplicitLocOpBuilder& b, Value x) { + if (auto src_shaped_ty = x.getType().dyn_cast()) { + Type src_ty = src_shaped_ty.getElementType(); + return CreateConst(b, src_ty, 1, src_shaped_ty.getShape()); + } + return CreateConst(b, x.getType(), 1); +} + // Triton type conversions. Value Cast(ImplicitLocOpBuilder& b, Value value, Type dst_element_ty) { Type src_ty = value.getType(); @@ -185,7 +246,7 @@ Value Cast(ImplicitLocOpBuilder& b, Value value, Type dst_element_ty) { return b.create(dst_ty, Cast(b, value, b.getF32Type())); } - // Float <=> float + // float => float auto src_fp_element_ty = src_element_ty.dyn_cast(); auto dst_fp_element_ty = dst_element_ty.dyn_cast(); if (src_fp_element_ty && dst_fp_element_ty) { @@ -196,6 +257,15 @@ Value Cast(ImplicitLocOpBuilder& b, Value value, Type dst_element_ty) { return b.create(dst_ty, value); } } + // int => int + if (src_element_ty.isa() && + dst_element_ty.isa()) { + if (src_element_ty.getIntOrFloatBitWidth() < + dst_element_ty.getIntOrFloatBitWidth()) { + return b.create(dst_ty, value); + } + return b.create(dst_ty, value); + } // int => float if (src_element_ty.isa() && dst_fp_element_ty) { // TODO(b/266862493): Support unsigned integer types. @@ -207,6 +277,10 @@ Value Cast(ImplicitLocOpBuilder& b, Value value, Type dst_element_ty) { // float => int if (src_fp_element_ty && dst_element_ty.isa()) { // TODO(b/266862493): Support unsigned integer types. + if (dst_element_ty.isInteger(1)) { + return b.create(ma::CmpFPredicate::UNE, value, + ZerosLike(b, value)); + } return b.create(dst_ty, value); } @@ -215,44 +289,6 @@ Value Cast(ImplicitLocOpBuilder& b, Value value, Type dst_element_ty) { << llvm_ir::DumpToString(dst_element_ty); } -// Get the value of the scalar constant's literal in a C++ type. -template -T ScalarConstantValue(const HloInstruction& instr, PrimitiveType dst_type) { - CHECK(hlo_query::IsScalarConstant(&instr)); - StatusOr converted = instr.literal().Convert(dst_type); - TF_CHECK_OK(converted.status()); - return converted.value().GetFirstElement(); -} - -// Create a scalar constant. -template -ma::ConstantOp CreateConst(ImplicitLocOpBuilder b, Type type, T value) { - if (type.isa()) { - return b.create(b.getIntegerAttr(type, value)); - } - if (type.isa()) { - return b.create( - b.getFloatAttr(type, static_cast(value))); - } - LOG(FATAL) << "Constant type not supported: " << llvm_ir::DumpToString(type); -} - -// Create a tensor constant. -template -ma::ConstantOp CreateConst(ImplicitLocOpBuilder& b, Type type, T value, - ArrayRef shape) { - auto tensor_type = mlir::RankedTensorType::get(shape, type); - if (auto int_type = type.dyn_cast()) { - return b.create(mlir::DenseElementsAttr::get( - tensor_type, mlir::APInt(int_type.getIntOrFloatBitWidth(), value))); - } - if (auto float_type = type.dyn_cast()) { - return b.create(mlir::DenseElementsAttr::get( - tensor_type, b.getFloatAttr(type, static_cast(value)))); - } - LOG(FATAL) << "Constant type not supported: " << llvm_ir::DumpToString(type); -} - Value Subtract(ImplicitLocOpBuilder& b, ValueRange values) { if (mlir::getElementTypeOrSelf(values[0]).isa()) { return b.create(values[0], values[1]); @@ -287,22 +323,6 @@ Value Minimum(ImplicitLocOpBuilder& b, ValueRange values) { return b.create(cmp, values[0], values[1]); } -Value ZerosLike(ImplicitLocOpBuilder& b, Value x) { - if (auto src_shaped_ty = x.getType().dyn_cast()) { - Type src_ty = src_shaped_ty.getElementType(); - return CreateConst(b, src_ty, 0, src_shaped_ty.getShape()); - } - return CreateConst(b, x.getType(), 0); -} - -Value OnesLike(ImplicitLocOpBuilder& b, Value x) { - if (auto src_shaped_ty = x.getType().dyn_cast()) { - Type src_ty = src_shaped_ty.getElementType(); - return CreateConst(b, src_ty, 1, src_shaped_ty.getShape()); - } - return CreateConst(b, x.getType(), 1); -} - // TODO(b/269489810): Contribute nicer builders to Triton, so we don't need to // define these utilities. Value Splat(ImplicitLocOpBuilder& b, Value value, ArrayRef shape) { @@ -1633,13 +1653,15 @@ StatusOr TritonWrapper( SmallVector fn_arg_types; for (HloInstruction* p : hlo_computation->parameter_instructions()) { fn_arg_types.push_back(mt::PointerType::get( - TritonType(b, p->shape().element_type()), mn::kGlobalMemorySpace)); + StorageType(b, TritonType(b, p->shape().element_type())), + mn::kGlobalMemorySpace)); } for (const ShapeUtil::IndexedShape& s : ShapeUtil::GetLeafShapes(hlo_computation->root_instruction()->shape())) { fn_arg_types.push_back(mt::PointerType::get( - TritonType(b, s.shape.element_type()), mn::kGlobalMemorySpace)); + StorageType(b, TritonType(b, s.shape.element_type())), + mn::kGlobalMemorySpace)); } auto fn = b.create(loc, fn_name, diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc index b68ce63f6a2096..832c7a295b981a 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc @@ -14,28 +14,40 @@ limitations under the License. ==============================================================================*/ #include +#include +#include #include #include #include +#include #include #include "absl/base/optimization.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_replace.h" #include "absl/strings/substitute.h" #include "xla/comparison_util.h" #include "xla/error_spec.h" +#include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_module.h" #include "xla/hlo/ir/hlo_opcode.h" #include "xla/primitive_util.h" #include "xla/service/gpu/gemm_rewriter_triton.h" #include "xla/service/gpu/tests/gpu_codegen_test.h" +#include "xla/service/pattern_matcher.h" +#include "xla/service/pattern_matcher_gmock.h" #include "xla/stream_executor/device_description.h" +#include "xla/xla.pb.h" #include "xla/xla_data.pb.h" +#include "tsl/platform/statusor.h" #include "tsl/platform/tensor_float_32_utils.h" namespace xla { namespace gpu { namespace { +namespace m = ::xla::match; + struct MixTypeParams { PrimitiveType lhs_ty; PrimitiveType rhs_ty; @@ -143,6 +155,19 @@ class NoTF32Test : public GpuCodegenTest { tsl::enable_tensor_float_32_execution(tf32_state_); } + DebugOptions GetDebugOptionsForTest() override { + DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_triton_gemm_any(true); + return debug_options; + } + + se::CudaComputeCapability GetCudaComputeCapability() { + return backend() + .default_stream_executor() + ->GetDeviceDescription() + .cuda_compute_capability(); + } + private: bool tf32_state_; }; @@ -503,25 +528,17 @@ INSTANTIATE_TEST_SUITE_P( class SelectTest : public NoTF32Test, public ::testing::WithParamInterface< - std::tuple> { - public: - se::CudaComputeCapability GetCudaComputeCapability() { - return backend() - .default_stream_executor() - ->GetDeviceDescription() - .cuda_compute_capability(); - } -}; + std::tuple> {}; TEST_P(SelectTest, SelectFusionExecutesCorrectly) { - PrimitiveType data_type1; - PrimitiveType data_type2; + PrimitiveType data_type1, data_type2; std::tie(data_type1, data_type2) = GetParam(); - - if ((data_type1 == BF16 || data_type2 == BF16) && - !GetCudaComputeCapability().IsAtLeast( - se::CudaComputeCapability::AMPERE)) { - GTEST_SKIP() << "No BF16 before Ampere."; + for (const PrimitiveType type : {data_type1, data_type2}) { + if (!IsTritonSupportedDataType(type, GetCudaComputeCapability())) { + GTEST_SKIP() << absl::Substitute( + "Unsupported data type: $0", + primitive_util::LowercasePrimitiveTypeName(type)); + } } const std::string hHloTestTemplate = R"( @@ -613,7 +630,7 @@ ENTRY e { /*run_hlo_passes=*/false)); } -std::string SelectTestParamsToString( +std::string TwoPrimitiveTypesToString( const ::testing::TestParamInfo>& data) { PrimitiveType data_type1; @@ -624,17 +641,26 @@ std::string SelectTestParamsToString( primitive_util::LowercasePrimitiveTypeName(data_type2)); } +// BF16: depending on the GPU generation. +constexpr std::array kSupportedDataTypes{PRED, S8, S16, S32, + F16, F32, BF16}; + INSTANTIATE_TEST_SUITE_P( SelectTestSuite, SelectTest, - ::testing::Combine(::testing::Values(PRED, S8, S16, S32, F16, BF16, F32), + ::testing::Combine(::testing::ValuesIn(kSupportedDataTypes), ::testing::Values(F16, BF16, F32)), - SelectTestParamsToString); + TwoPrimitiveTypesToString); class ConstantTest : public NoTF32Test, public ::testing::WithParamInterface {}; TEST_P(ConstantTest, ConstantFusionExecutesCorrectly) { - PrimitiveType data_type = GetParam(); + const PrimitiveType data_type = GetParam(); + if (!IsTritonSupportedDataType(data_type, GetCudaComputeCapability())) { + GTEST_SKIP() << absl::Substitute( + "Unsupported data type: $0", + primitive_util::LowercasePrimitiveTypeName(data_type)); + } const std::string hHloTestTemplate = R"( HloModule m, is_scheduled=true @@ -716,8 +742,57 @@ ENTRY e { /*run_hlo_passes=*/false)); } -INSTANTIATE_TEST_SUITE_P(ConstantTestSuite, ConstantTest, - ::testing::Values(PRED, S8, S16, S32, F16, F32)); +INSTANTIATE_TEST_SUITE_P( + ConstantTestSuite, ConstantTest, ::testing::ValuesIn(kSupportedDataTypes), + [](const ::testing::TestParamInfo type) { + return primitive_util::LowercasePrimitiveTypeName(type.param); + }); + +class ConvertTest : public NoTF32Test, + public ::testing::WithParamInterface< + std::tuple> {}; + +TEST_P(ConvertTest, ConvertFusionExecutesCorrectly) { + PrimitiveType data_type1, data_type2; + std::tie(data_type1, data_type2) = GetParam(); + for (const PrimitiveType type : {data_type1, data_type2}) { + if (!IsTritonSupportedDataType(type, GetCudaComputeCapability())) { + GTEST_SKIP() << absl::Substitute( + "Unsupported data type: $0", + primitive_util::LowercasePrimitiveTypeName(type)); + } + } + + const std::string hlo_text = absl::Substitute( + R"( +t { + p0 = $0[2,2] parameter(0) + p0c = $1[2,2] convert(p0) + p0cc = f32[2,2] convert(p0c) + p1 = f32[2,2] parameter(1) + ROOT r = f32[2,2] dot(p0cc, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +} + +ENTRY e{ + p0 = $0[2,2] parameter(0) + p1 = f32[2,2] parameter(1) + ROOT r = f32[2,2] fusion(p0, p1), kind=kCustom, calls=t, + backend_config={"kind":"__triton_gemm"} +})", + primitive_util::LowercasePrimitiveTypeName(data_type1), + primitive_util::LowercasePrimitiveTypeName(data_type2)); + + MatchOptimizedHlo(hlo_text, R"( +CHECK: block_m + )"); +} + +INSTANTIATE_TEST_SUITE_P( + ConvertTestSuite, ConvertTest, + ::testing::Combine(::testing::ValuesIn(kSupportedDataTypes), + ::testing::ValuesIn(kSupportedDataTypes)), + TwoPrimitiveTypesToString); class TritonSoftmaxTest : public GpuCodegenTest, public ::testing::WithParamInterface { From be8bc81b566e89b6f9958d2ca0d84bb250ebcfeb Mon Sep 17 00:00:00 2001 From: Sergei Lebedev Date: Fri, 22 Sep 2023 07:51:57 -0700 Subject: [PATCH 143/567] Added a few missing members to jax_jit.JitState PiperOrigin-RevId: 567621107 --- third_party/xla/xla/python/xla_extension/jax_jit.pyi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/python/xla_extension/jax_jit.pyi b/third_party/xla/xla/python/xla_extension/jax_jit.pyi index 4c7cd95f446fab..e495b5fe8db9a2 100644 --- a/third_party/xla/xla/python/xla_extension/jax_jit.pyi +++ b/third_party/xla/xla/python/xla_extension/jax_jit.pyi @@ -26,8 +26,10 @@ CompiledFunction = xla_extension.CompiledFunction class JitState: disable_jit: Optional[bool] enable_x64: Optional[bool] - extra_jit_context: Any - post_hook: Optional[Callable] + enable_memories: Optional[bool] + default_device: Optional[Any] + extra_jit_context: Optional[Any] + post_hook: Optional[Callable[..., Any]] def global_state() -> JitState: ... def thread_local_state() -> JitState: ... From b698c540108c00752a4668765bb5b2af762519cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 08:28:12 -0700 Subject: [PATCH 144/567] Integrate LLVM at llvm/llvm-project@058222b23166 Updates LLVM usage to match [058222b23166](https://github.com/llvm/llvm-project/commit/058222b23166) PiperOrigin-RevId: 567628854 --- third_party/llvm/generated.patch | 53 ------------------- third_party/llvm/workspace.bzl | 4 +- .../xla/xla/mlir_hlo/gml_st/IR/gml_st_ops.td | 6 +-- .../cpu_tiling/fusion_planning_for_cpu.cc | 4 +- .../transform_elementwise_for_cpu.cc | 10 ++-- .../cpu_tiling/transform_reduce_for_cpu.cc | 2 +- .../gml_st/transforms/fusion/fusion.cc | 4 +- .../transforms/scalarization/scalarization.cc | 11 ++-- .../xla/xla/mlir_hlo/thlo/IR/thlo_ops.cc | 29 +++++----- .../xla/xla/mlir_hlo/thlo/IR/thlo_ops.td | 30 +++++------ .../tools/mlir_interpreter/dialects/vector.cc | 4 +- 11 files changed, 50 insertions(+), 107 deletions(-) diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch index 409e0541024c8e..509398da979e83 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch @@ -1,54 +1 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp ---- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp -@@ -5700,7 +5700,7 @@ - if (OpOpcode == ISD::TRUNCATE) { - SDValue OpOp = N1.getOperand(0); - if (OpOp.getValueType() == VT) { -- if (OpOp.getOpcode() == ISD::AssertZext) { -+ if (OpOp.getOpcode() == ISD::AssertZext && N1->hasOneUse()) { - APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(), - N1.getScalarValueSizeInBits()); - if (MaskedValueIsZero(OpOp, HiBits)) { -diff -ruN --strip-trailing-cr a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp ---- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp -+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp -@@ -2645,8 +2645,7 @@ - for (;;) { - // Look through nodes that don't alter the bits of the incoming value. - unsigned Op = Arg.getOpcode(); -- if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST || -- Op == ISD::AssertZext) { -+ if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { - Arg = Arg.getOperand(0); - continue; - } -diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll ---- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll -+++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll -@@ -4,6 +4,8 @@ - define i1 @load_bv_v4i8(i1 zeroext %a) { - ; CHECK-LABEL: load_bv_v4i8: - ; CHECK: // %bb.0: -+; CHECK-NEXT: cmp w0, #0 -+; CHECK-NEXT: cset w0, ne - ; CHECK-NEXT: ret - %b = zext i1 %a to i32 - %c = icmp eq i32 %b, 1 -diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll ---- a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll -+++ b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll -@@ -62,9 +62,9 @@ - ; CHECK-LABEL: fma_reassociate: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma --; CHECK-NEXT: vfmadd.vv v11, v10, v12, v0.t --; CHECK-NEXT: vfmadd.vv v9, v8, v11, v0.t --; CHECK-NEXT: vmv.v.v v8, v9 -+; CHECK-NEXT: vfmadd.vv v9, v8, v12, v0.t -+; CHECK-NEXT: vfmadd.vv v11, v10, v9, v0.t -+; CHECK-NEXT: vmv.v.v v8, v11 - ; CHECK-NEXT: ret - %1 = call fast @llvm.vp.fmul.nxv1f64( %a, %b, %m, i32 %vl) - %2 = call fast @llvm.vp.fmul.nxv1f64( %c, %d, %m, i32 %vl) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 5abb4304cc53f3..288b1748010f35 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "3b0f812b9af459c4f857e4a7ffffa01f7a21446e" - LLVM_SHA256 = "47f8c81275437fb4ebcf0286125d683fb946cbddf0b725dc61d786141b32bc08" + LLVM_COMMIT = "058222b2316615194c089f2bc68d11341f39d26e" + LLVM_SHA256 = "99d3c38eb11dee8f00bd74b69152d961ab73cf4488842f6120e81342eeb94a3b" tf_http_archive( name = name, diff --git a/third_party/xla/xla/mlir_hlo/gml_st/IR/gml_st_ops.td b/third_party/xla/xla/mlir_hlo/gml_st/IR/gml_st_ops.td index 8a78bc5102ed45..b19d067027d752 100644 --- a/third_party/xla/xla/mlir_hlo/gml_st/IR/gml_st_ops.td +++ b/third_party/xla/xla/mlir_hlo/gml_st/IR/gml_st_ops.td @@ -51,10 +51,8 @@ def GMLST_FusionOp : GMLST_Op<"fusion", [ YieldOp getTerminator(); // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t numOperands = this->getNumOperands(); - int64_t numInits = this->getInits().size(); - return {numOperands - numInits, numOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitsMutable(); } }]; } diff --git a/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/fusion_planning_for_cpu.cc b/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/fusion_planning_for_cpu.cc index 73a5a3a9ee45d2..a6d127c6eb0de4 100644 --- a/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/fusion_planning_for_cpu.cc +++ b/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/fusion_planning_for_cpu.cc @@ -69,8 +69,8 @@ bool allowedToFuse(Operation* consumerOp, Operation* producerOp) { auto dstStyleOp = dyn_cast(consumerOp); if (!dstStyleOp) return false; - if (llvm::any_of(dstStyleOp.getDpsInitOperands(), [&](OpOperand* operand) { - return operand->get().getDefiningOp() == producerOp; + if (llvm::any_of(dstStyleOp.getDpsInits(), [&](Value operand) { + return operand.getDefiningOp() == producerOp; })) return true; } diff --git a/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_elementwise_for_cpu.cc b/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_elementwise_for_cpu.cc index 309d2b02f592c5..b9a058d25904db 100644 --- a/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_elementwise_for_cpu.cc +++ b/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_elementwise_for_cpu.cc @@ -65,7 +65,9 @@ Operation *findRootElementwiseOp(Operation *op, FusionFilterFn fusionFilterFn) { if (hasLabel(owner, kTransformedLabel)) continue; if (hasLabel(owner, kFusionPlanningLabel)) continue; if (auto dpsOp = dyn_cast(owner)) { - if (llvm::is_contained(dpsOp.getDpsInitOperands(), &use)) continue; + SmallVector opOperands = llvm::to_vector(llvm::map_range( + dpsOp.getDpsInitsMutable(), [](OpOperand &o) { return &o; })); + if (llvm::is_contained(opOperands, &use)) continue; } curOp = owner; rootOp = curOp; @@ -205,9 +207,9 @@ FusionCluster findElementwiseCluster(Operation *rootOp, // Add tensor.empty ops to the cluster. for (auto *op : resultOps) { if (auto dpsOp = dyn_cast(op)) { - for (auto &operand : dpsOp.getDpsInitOperands()) { - if (auto emptyOp = dyn_cast_or_null( - operand->get().getDefiningOp())) + for (auto operand : dpsOp.getDpsInits()) { + if (auto emptyOp = + dyn_cast_or_null(operand.getDefiningOp())) fusionCluster.operations.insert(emptyOp); } } diff --git a/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_reduce_for_cpu.cc b/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_reduce_for_cpu.cc index 9910cd9c3c4b23..766c33a4573fa1 100644 --- a/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_reduce_for_cpu.cc +++ b/third_party/xla/xla/mlir_hlo/gml_st/transforms/cpu_tiling/transform_reduce_for_cpu.cc @@ -74,7 +74,7 @@ LogicalResult validateOp(linalg::ReduceOp reduceOp, PatternRewriter &rewriter, return rewriter.notifyMatchFailure( reduceOp, "expects 1 reduction dimension element. 0 or > 1 received."); } - OpOperandVector operands = reduceOp.getDpsInputOperands(); + SmallVector operands = reduceOp.getDpsInputOperands(); if (operands.size() != 1) { return rewriter.notifyMatchFailure(reduceOp, "expects 1 operand. 0 or > 1 received."); diff --git a/third_party/xla/xla/mlir_hlo/gml_st/transforms/fusion/fusion.cc b/third_party/xla/xla/mlir_hlo/gml_st/transforms/fusion/fusion.cc index f0723f7b00e142..57e6b3a5cc42d0 100644 --- a/third_party/xla/xla/mlir_hlo/gml_st/transforms/fusion/fusion.cc +++ b/third_party/xla/xla/mlir_hlo/gml_st/transforms/fusion/fusion.cc @@ -510,8 +510,8 @@ SmallVector getRootOpInitOperands(PatternRewriter& rewriter, SmallVector initOperands; - for (auto* operand : dstStyleOp.getDpsInitOperands()) { - initOperands.push_back(getTiedSourceOp(rewriter, operand, fusionCluster)); + for (OpOperand& operand : dstStyleOp.getDpsInitsMutable()) { + initOperands.push_back(getTiedSourceOp(rewriter, &operand, fusionCluster)); } return initOperands; diff --git a/third_party/xla/xla/mlir_hlo/gml_st/transforms/scalarization/scalarization.cc b/third_party/xla/xla/mlir_hlo/gml_st/transforms/scalarization/scalarization.cc index c8115b6889e1b0..8bc17d17b4b3c5 100644 --- a/third_party/xla/xla/mlir_hlo/gml_st/transforms/scalarization/scalarization.cc +++ b/third_party/xla/xla/mlir_hlo/gml_st/transforms/scalarization/scalarization.cc @@ -505,10 +505,13 @@ LogicalResult scalarizeLinalgOp(LinalgOp linalgOp, PatternRewriter &rewriter) { if (isa(linalgOp)) { if (llvm::all_of(linalgOp->getUses(), [&](OpOperand &use) { Operation *user = use.getOwner(); - return isa(user) && - llvm::is_contained(cast(user) - .getDpsInitOperands(), - &use); + if (auto dpsOp = dyn_cast(user)) { + SmallVector opOperands = llvm::to_vector( + llvm::map_range(dpsOp.getDpsInitsMutable(), + [](OpOperand &o) { return &o; })); + return llvm::is_contained(opOperands, &use); + } + return false; })) return failure(); } diff --git a/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.cc b/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.cc index 4d09db0186585c..d56141388076da 100644 --- a/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.cc +++ b/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.cc @@ -185,18 +185,17 @@ SmallVector getIterationDomainForTensor(OpBuilder &b, Location loc, static void getDstStyleOpEffectsImpl( SmallVectorImpl> &effects, - ValueRange results, const OpOperandVector &inputOperands, - const OpOperandVector &outputOperands) { - for (auto *operand : inputOperands) { - if (!operand->get().getType().isa()) continue; - effects.emplace_back(MemoryEffects::Read::get(), operand->get(), + ValueRange results, ValueRange inputOperands, ValueRange outputOperands) { + for (auto operand : inputOperands) { + if (!operand.getType().isa()) continue; + effects.emplace_back(MemoryEffects::Read::get(), operand, SideEffects::DefaultResource::get()); } - for (auto *operand : outputOperands) { - if (!operand->get().getType().isa()) continue; - effects.emplace_back(MemoryEffects::Read::get(), operand->get(), + for (auto operand : outputOperands) { + if (!operand.getType().isa()) continue; + effects.emplace_back(MemoryEffects::Read::get(), operand, SideEffects::DefaultResource::get()); - effects.emplace_back(MemoryEffects::Write::get(), operand->get(), + effects.emplace_back(MemoryEffects::Write::get(), operand, SideEffects::DefaultResource::get()); } } @@ -557,7 +556,7 @@ void ConcatenateOp::getEffects( SmallVectorImpl> &effects) { getDstStyleOpEffectsImpl(effects, getOperation()->getResults(), - getDpsInputOperands(), getDpsInitOperands()); + getDpsInputs(), getDpsInits()); } //===----------------------------------------------------------------------===// @@ -713,7 +712,7 @@ void DynamicBroadcastInDimOp::getEffects( SmallVectorImpl> &effects) { getDstStyleOpEffectsImpl(effects, getOperation()->getResults(), - getDpsInputOperands(), getDpsInitOperands()); + getDpsInputs(), getDpsInits()); } //===----------------------------------------------------------------------===// @@ -879,7 +878,7 @@ void ScatterOp::getEffects( SmallVectorImpl> &effects) { getDstStyleOpEffectsImpl(effects, getOperation()->getResults(), - getDpsInputOperands(), getDpsInitOperands()); + getDpsInputs(), getDpsInits()); } //===----------------------------------------------------------------------===// @@ -981,7 +980,7 @@ void GatherOp::getEffects( SmallVectorImpl> &effects) { getDstStyleOpEffectsImpl(effects, getOperation()->getResults(), - getDpsInputOperands(), getDpsInitOperands()); + getDpsInputs(), getDpsInits()); } //===----------------------------------------------------------------------===// @@ -1231,7 +1230,7 @@ void SortOp::getEffects( SmallVectorImpl> &effects) { getDstStyleOpEffectsImpl(effects, getOperation()->getResults(), - getDpsInputOperands(), getDpsInitOperands()); + getDpsInputs(), getDpsInits()); } //===----------------------------------------------------------------------===// @@ -1354,7 +1353,7 @@ void ReverseOp::getEffects( SmallVectorImpl> &effects) { getDstStyleOpEffectsImpl(effects, getOperation()->getResults(), - getDpsInputOperands(), getDpsInitOperands()); + getDpsInputs(), getDpsInits()); } } // namespace thlo diff --git a/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.td b/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.td index aec9816e8a2864..5346d413842e51 100644 --- a/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.td +++ b/third_party/xla/xla/mlir_hlo/thlo/IR/thlo_ops.td @@ -83,9 +83,8 @@ def THLO_ConcatenateOp : THLO_DstStyleOp<"concatenate", [ let extraClassDeclaration = [{ // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t getNumOperands = this->getNumOperands(); - return {getNumOperands - 1, getNumOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitMutable(); } }]; } @@ -130,9 +129,8 @@ def THLO_DynamicBroadcastInDimOp : THLO_DstStyleOp<"dynamic_broadcast_in_dim", [ let extraClassDeclaration = [{ // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t getNumOperands = this->getNumOperands(); - return {getNumOperands - 1, getNumOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitMutable(); } }]; } @@ -176,9 +174,8 @@ def THLO_GatherOp : THLO_DstStyleOp<"gather", [ let extraClassDeclaration = [{ // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t getNumOperands = this->getNumOperands(); - return {getNumOperands - 1, getNumOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitMutable(); } }]; } @@ -240,9 +237,8 @@ def THLO_ScatterOp : THLO_DstStyleOp<"scatter", [ int64_t getIndicesCount() { return getIndices().getType().getDimSize(0); } // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t getNumOperands = this->getNumOperands(); - return {getNumOperands - 1, getNumOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitMutable(); } }]; } @@ -298,9 +294,8 @@ def THLO_SortOp : THLO_DstStyleOp<"sort", [ let extraClassDeclaration = [{ // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t getNumOperands = this->getNumOperands(); - return {getNumOperands - getInits().size(), getNumOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitsMutable(); } }]; } @@ -334,9 +329,8 @@ def THLO_ReverseOp : THLO_DstStyleOp<"reverse", [ let extraClassDeclaration = [{ // Implement method necessary for DestinationStyleOpInterface. - std::pair getDpsInitsPositionRange() { - int64_t getNumOperands = this->getNumOperands(); - return {getNumOperands - 1, getNumOperands}; + mlir::MutableOperandRange getDpsInitsMutable() { + return getInitMutable(); } }]; } diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/vector.cc b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/vector.cc index 6d8563ff6e684d..47cf0267bc8605 100644 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/vector.cc +++ b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/vector.cc @@ -284,7 +284,7 @@ InterpreterValue extract(InterpreterState& state, vector::ExtractOp extract, const InterpreterValue& vector) { auto result = vector; auto& resultView = result.view(); - for (int64_t offset : extract.getPosition()) { + for (int64_t offset : extract.getStaticPosition()) { state.checkSuccess(resultView.slice(0, offset), "index out of bounds"); } return resultView.rank() == 0 ? result.extractElement({}) : result; @@ -374,7 +374,7 @@ InterpreterValue insert(InterpreterState& state, vector::InsertOp insert, auto result = dst.clone(); auto resultSlice = result; auto& resultSliceView = resultSlice.view(); - for (int64_t offset : insert.getPosition()) { + for (int64_t offset : insert.getStaticPosition()) { state.checkSuccess(resultSliceView.slice(0, offset), "index out of bounds"); } resultSlice.fill([&](auto indices) { return src.extractElement(indices); }); From 655db6e4db11c3bc310e46145d7bb2292ea3063c Mon Sep 17 00:00:00 2001 From: Hyeontaek Lim Date: Fri, 22 Sep 2023 08:36:32 -0700 Subject: [PATCH 145/567] Clarify that PjRtClient and PjRtDevice memory_spaces are not in particular order PiperOrigin-RevId: 567630629 --- third_party/xla/xla/pjrt/pjrt_client.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/xla/xla/pjrt/pjrt_client.h b/third_party/xla/xla/pjrt/pjrt_client.h index 1ac2461e8fc136..5038a3e69dd399 100644 --- a/third_party/xla/xla/pjrt/pjrt_client.h +++ b/third_party/xla/xla/pjrt/pjrt_client.h @@ -171,6 +171,7 @@ class PjRtDevice { } // Returns all memory spaces attached to this device. + // The memory spaces are in no particular order. virtual absl::Span memory_spaces() const = 0; // Returns the default memory space attached to this device. @@ -485,6 +486,7 @@ class PjRtClient { int local_hardware_id) const = 0; // Return all memory spaces owned by the client. + // The memory spaces are in no particular order. virtual absl::Span memory_spaces() const = 0; // Return an ID that identifies the platform (CPU/GPU/TPU). From 5b7a8b07e6ead1db358fa0a5cbff930a83bbedfd Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 22 Sep 2023 09:01:16 -0700 Subject: [PATCH 146/567] Generate CUDA stubs using implib.so, rather than by writing C++ stubs. We load CUDA libraries lazily using dlopen()/dlsym() primarily to comply with the manylinux rules for Python wheels, which require that libraries in a wheel only link directly against an allowlist of libraries. In order to access CUDA using dlopen()/dlsym() without changing any of our CUDA-using code, TSL contains stub implementations of CUDA APIs that, when invoked, load the relevant library, obtain the requested symbol using dlsym(), and call into that symbol. The current CUDA stub libraries were constructed using a tool based on clang inside Google, which parses the CUDA headers and generates stub code for each API. It is therefore difficult to update the stubs without access to that tool. Each stub generated by the tool is a C++ function, which is very verbose. Further, a number of manual edits are required to the generated code, making maintenance tedious. However, there is a better way. implib.so (https://github.com/yugr/Implib.so) is a tool for automatically generating stubs from a .so file. This tool is considerably simpler because it generates a stub using assembly language, in which it turns out we do not need to know the type signature of the function being called. A completely generic trampoline function will do, with little or no bespoke knowledge of each function. We can adapt it to solve our CUDA stub problem. implib-gen.py, which is the tool implib.so provides, isn't perfect for our needs, because it requires access to the .so file at stub generation time, which we don't have in our Bazel build. Instead, we can split it into two phases: get_symbols.py, which, given a .so file extracts a list of public symbols that should be present in a stub. That list of symbols is checked into the TSL tree. make_stub.py, which, given a list of symbols, generates trampolines for each function. This change changes the TSL CUDA build to use make_stub.py to generate stubs from the list of symbols at Bazel build time, allowing us to delete over 130k lines of autogenerated C++ stub code. PiperOrigin-RevId: 567635940 --- tensorflow/opensource_only.files | 3 + tensorflow/workspace2.bzl | 2 + third_party/implib_so/BUILD | 21 + third_party/implib_so/get_symbols.py | 38 + third_party/implib_so/implib_so.BUILD | 20 + third_party/implib_so/make_stub.py | 68 + third_party/implib_so/workspace.bzl | 13 + third_party/xla/opensource_only.files | 3 + third_party/xla/third_party/implib_so/BUILD | 23 + .../xla/third_party/implib_so/get_symbols.py | 38 + .../xla/third_party/implib_so/implib_so.BUILD | 20 + .../xla/third_party/implib_so/make_stub.py | 68 + .../xla/third_party/implib_so/workspace.bzl | 13 + .../xla/third_party/tsl/opensource_only.files | 4 + .../tsl/third_party/implib_so/BUILD | 23 + .../tsl/third_party/implib_so/get_symbols.py | 38 + .../tsl/third_party/implib_so/implib_so.BUILD | 20 + .../tsl/third_party/implib_so/make_stub.py | 68 + .../tsl/third_party/implib_so/workspace.bzl | 13 + .../xla/third_party/tsl/tsl/cuda/BUILD.bazel | 137 +- .../third_party/tsl/tsl/cuda/cublas.symbols | 736 ++ .../third_party/tsl/tsl/cuda/cublasLt.symbols | 234 + .../tsl/tsl/cuda/cublasLt_11_0.inc | 390 - .../third_party/tsl/tsl/cuda/cublasLt_stub.cc | 44 +- .../third_party/tsl/tsl/cuda/cublas_10_0.inc | 4898 ---------- .../third_party/tsl/tsl/cuda/cublas_10_1.inc | 5023 ---------- .../third_party/tsl/tsl/cuda/cublas_10_2.inc | 5023 ---------- .../third_party/tsl/tsl/cuda/cublas_11_0.inc | 5197 ----------- .../third_party/tsl/tsl/cuda/cublas_stub.cc | 222 +- .../xla/third_party/tsl/tsl/cuda/cuda.symbols | 583 ++ .../third_party/tsl/tsl/cuda/cuda_10_0.inc | 2133 ----- .../third_party/tsl/tsl/cuda/cuda_10_1.inc | 2166 ----- .../third_party/tsl/tsl/cuda/cuda_10_2.inc | 2328 ----- .../third_party/tsl/tsl/cuda/cuda_11_0.inc | 2943 ------ .../third_party/tsl/tsl/cuda/cuda_11_2.inc | 2816 ------ .../third_party/tsl/tsl/cuda/cuda_12_0.inc | 3323 ------- .../tsl/tsl/cuda/cuda_runtime_10_0.inc | 1846 ---- .../tsl/tsl/cuda/cuda_runtime_10_1.inc | 1854 ---- .../tsl/tsl/cuda/cuda_runtime_10_2.inc | 1907 ---- .../tsl/tsl/cuda/cuda_runtime_11_0.inc | 2639 ------ .../tsl/tsl/cuda/cuda_runtime_11_2.inc | 2259 ----- .../tsl/tsl/cuda/cuda_runtime_11_8.inc | 2771 ------ .../tsl/tsl/cuda/cuda_runtime_12_0.inc | 2676 ------ .../tsl/tsl/cuda/cuda_runtime_9_0.inc | 1421 --- .../xla/third_party/tsl/tsl/cuda/cuda_stub.cc | 48 +- .../third_party/tsl/tsl/cuda/cudart.symbols | 399 + .../third_party/tsl/tsl/cuda/cudart_stub.cc | 120 +- .../third_party/tsl/tsl/cuda/cudnn.symbols | 268 + .../third_party/tsl/tsl/cuda/cudnn_6_0.inc | 1825 ---- .../third_party/tsl/tsl/cuda/cudnn_7_0.inc | 2027 ---- .../third_party/tsl/tsl/cuda/cudnn_7_1.inc | 2361 ----- .../third_party/tsl/tsl/cuda/cudnn_7_3.inc | 2585 ------ .../third_party/tsl/tsl/cuda/cudnn_7_4.inc | 2726 ------ .../third_party/tsl/tsl/cuda/cudnn_7_6.inc | 3257 ------- .../third_party/tsl/tsl/cuda/cudnn_8_0.inc | 3213 ------- .../third_party/tsl/tsl/cuda/cudnn_stub.cc | 49 +- .../third_party/tsl/tsl/cuda/cufft.symbols | 58 + .../third_party/tsl/tsl/cuda/cufft_10_0.inc | 361 - .../third_party/tsl/tsl/cuda/cufft_9_0.inc | 307 - .../third_party/tsl/tsl/cuda/cufft_stub.cc | 35 +- .../third_party/tsl/tsl/cuda/cupti.symbols | 148 + .../third_party/tsl/tsl/cuda/cupti_10_0.inc | 763 -- .../third_party/tsl/tsl/cuda/cupti_10_1.inc | 763 -- .../third_party/tsl/tsl/cuda/cupti_10_2.inc | 763 -- .../third_party/tsl/tsl/cuda/cupti_11_0.inc | 763 -- .../third_party/tsl/tsl/cuda/cupti_12_0.inc | 744 -- .../third_party/tsl/tsl/cuda/cupti_9_0.inc | 763 -- .../third_party/tsl/tsl/cuda/cupti_stub.cc | 47 +- .../third_party/tsl/tsl/cuda/curand_10_0.inc | 268 - .../third_party/tsl/tsl/cuda/curand_10_1.inc | 268 - .../third_party/tsl/tsl/cuda/curand_10_2.inc | 268 - .../third_party/tsl/tsl/cuda/curand_11_0.inc | 268 - .../third_party/tsl/tsl/cuda/curand_9_0.inc | 268 - .../third_party/tsl/tsl/cuda/cusolver.symbols | 926 ++ .../tsl/tsl/cuda/cusolver_dense_10_0.inc | 2283 ----- .../tsl/tsl/cuda/cusolver_dense_10_1.inc | 3139 ------- .../tsl/tsl/cuda/cusolver_dense_10_2.inc | 3667 -------- .../tsl/tsl/cuda/cusolver_dense_11_0.inc | 5149 ---------- .../third_party/tsl/tsl/cuda/cusolver_stub.cc | 40 +- .../third_party/tsl/tsl/cuda/cusparse.symbols | 421 + .../tsl/tsl/cuda/cusparse_10_0.inc | 7832 ---------------- .../tsl/tsl/cuda/cusparse_10_1.inc | 8262 ----------------- .../tsl/tsl/cuda/cusparse_10_2.inc | 8262 ----------------- .../tsl/tsl/cuda/cusparse_11_0.inc | 7025 -------------- .../tsl/tsl/cuda/cusparse_12_0.inc | 6080 ------------ .../third_party/tsl/tsl/cuda/cusparse_9_0.inc | 7152 -------------- .../third_party/tsl/tsl/cuda/cusparse_stub.cc | 47 +- .../xla/third_party/tsl/tsl/cuda/stub.bzl | 26 + .../xla/third_party/tsl/workspace2.bzl | 42 +- 89 files changed, 4836 insertions(+), 137315 deletions(-) create mode 100644 third_party/implib_so/BUILD create mode 100644 third_party/implib_so/get_symbols.py create mode 100644 third_party/implib_so/implib_so.BUILD create mode 100644 third_party/implib_so/make_stub.py create mode 100644 third_party/implib_so/workspace.bzl create mode 100644 third_party/xla/third_party/implib_so/BUILD create mode 100644 third_party/xla/third_party/implib_so/get_symbols.py create mode 100644 third_party/xla/third_party/implib_so/implib_so.BUILD create mode 100644 third_party/xla/third_party/implib_so/make_stub.py create mode 100644 third_party/xla/third_party/implib_so/workspace.bzl create mode 100644 third_party/xla/third_party/tsl/third_party/implib_so/BUILD create mode 100644 third_party/xla/third_party/tsl/third_party/implib_so/get_symbols.py create mode 100644 third_party/xla/third_party/tsl/third_party/implib_so/implib_so.BUILD create mode 100644 third_party/xla/third_party/tsl/third_party/implib_so/make_stub.py create mode 100644 third_party/xla/third_party/tsl/third_party/implib_so/workspace.bzl create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublas.symbols create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublasLt.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublasLt_11_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublas_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublas_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublas_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cublas_11_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_11_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_11_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_12_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_8.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_12_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_9_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudart.symbols create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_6_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_3.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_4.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_6.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cudnn_8_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cufft.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cufft_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cufft_9_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti_11_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti_12_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cupti_9_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/curand_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/curand_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/curand_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/curand_11_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/curand_9_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusolver.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_11_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse.symbols delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_1.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_2.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse_11_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse_12_0.inc delete mode 100644 third_party/xla/third_party/tsl/tsl/cuda/cusparse_9_0.inc create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/stub.bzl diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 9a0bc808b1ea60..907e661369c704 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -259,6 +259,9 @@ tf_staging/third_party/gpus/rocm/rocm_config.h.tpl: tf_staging/third_party/gpus/rocm_configure.bzl: tf_staging/third_party/grpc/BUILD: tf_staging/third_party/icu/udata.patch: +tf_staging/third_party/implib_so/BUILD: +tf_staging/third_party/implib_so/get_symbols.py: +tf_staging/third_party/implib_so/make_stub.py: tf_staging/third_party/linenoise.BUILD: tf_staging/third_party/llvm_openmp/BUILD: tf_staging/third_party/llvm_openmp/cmake_vars.bzl: diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl index 50aaf181c2beb0..d0a6b6f2ff4aad 100644 --- a/tensorflow/workspace2.bzl +++ b/tensorflow/workspace2.bzl @@ -31,6 +31,7 @@ load("//third_party/gemmlowp:workspace.bzl", gemmlowp = "repo") load("//third_party/hexagon:workspace.bzl", hexagon_nn = "repo") load("//third_party/highwayhash:workspace.bzl", highwayhash = "repo") load("//third_party/hwloc:workspace.bzl", hwloc = "repo") +load("//third_party/implib_so:workspace.bzl", implib_so = "repo") load("//third_party/icu:workspace.bzl", icu = "repo") load("//third_party/jpeg:workspace.bzl", jpeg = "repo") load("//third_party/libprotobuf_mutator:workspace.bzl", libprotobuf_mutator = "repo") @@ -72,6 +73,7 @@ def _initialize_third_party(): highwayhash() hwloc() icu() + implib_so() jpeg() kissfft() libprotobuf_mutator() diff --git a/third_party/implib_so/BUILD b/third_party/implib_so/BUILD new file mode 100644 index 00000000000000..ca6976cd8d3425 --- /dev/null +++ b/third_party/implib_so/BUILD @@ -0,0 +1,21 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # MIT + +py_binary( + name = "get_symbols", + srcs = ["get_symbols.py"], + deps = [ + "@bazel_tools//tools/python/runfiles", + "@implib_so//:implib_gen_lib", + ], +) + +py_binary( + name = "make_stub", + srcs = ["make_stub.py"], + deps = [ + "@bazel_tools//tools/python/runfiles", + "@implib_so//:implib_gen_lib", + ], +) diff --git a/third_party/implib_so/get_symbols.py b/third_party/implib_so/get_symbols.py new file mode 100644 index 00000000000000..9625052f7b69b6 --- /dev/null +++ b/third_party/implib_so/get_symbols.py @@ -0,0 +1,38 @@ +"""Given a .so file, lists symbols that should be included in a stub. + +Example usage: +$ bazel run -c opt @local_tsl//third_party/implib_so:get_symbols +/usr/local/cuda/lib64/libcudart.so > third_party/tsl/tsl/cuda/cudart.symbols +""" + +import argparse +import importlib + +# We can't import implib-gen directly because it has a dash in its name. +implib = importlib.import_module('implib-gen') + + +def _is_exported_function(s): + return ( + s['Bind'] != 'LOCAL' + and s['Type'] == 'FUNC' + and s['Ndx'] != 'UND' + and s['Name'] not in ['', '_init', '_fini'] + and s['Default'] + ) + + +def main(): + parser = argparse.ArgumentParser( + description='Extracts a list of symbols from a shared library' + ) + parser.add_argument('library', help='Path to the .so file.') + args = parser.parse_args() + syms = implib.collect_syms(args.library) + funs = [s['Name'] for s in syms if _is_exported_function(s)] + for f in sorted(funs): + print(f) + + +if __name__ == '__main__': + main() diff --git a/third_party/implib_so/implib_so.BUILD b/third_party/implib_so/implib_so.BUILD new file mode 100644 index 00000000000000..bbfb2898eb12dd --- /dev/null +++ b/third_party/implib_so/implib_so.BUILD @@ -0,0 +1,20 @@ +# Description: +# Implib.so is a simple equivalent of Windows DLL import libraries for POSIX +# shared libraries. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # MIT + +exports_files([ + "LICENSE.txt", +]) + +py_library( + name = "implib_gen_lib", + srcs = ["implib-gen.py"], + data = glob([ + "arch/**/*.S.tpl", + "arch/**/*.ini", + ]), +) diff --git a/third_party/implib_so/make_stub.py b/third_party/implib_so/make_stub.py new file mode 100644 index 00000000000000..f0e1fe564c0c17 --- /dev/null +++ b/third_party/implib_so/make_stub.py @@ -0,0 +1,68 @@ +"""Given a list of symbols, generates a stub.""" + +import argparse +import configparser +import os +import string + +from bazel_tools.tools.python.runfiles import runfiles + +r = runfiles.Create() + + +def main(): + parser = argparse.ArgumentParser( + description='Generates stubs for CUDA libraries.' + ) + parser.add_argument('symbols', help='File containing a list of symbols.') + parser.add_argument( + '--outdir', '-o', help='Path to create wrapper at', default='.' + ) + parser.add_argument( + '--target', + help='Target platform name, e.g. x86_64, aarch64.', + required=True, + ) + args = parser.parse_args() + + config_path = r.Rlocation(f'implib_so/arch/{args.target}/config.ini') + table_path = r.Rlocation(f'implib_so/arch/{args.target}/table.S.tpl') + trampoline_path = r.Rlocation( + f'implib_so/arch/{args.target}/trampoline.S.tpl' + ) + + cfg = configparser.ConfigParser(inline_comment_prefixes=';') + cfg.read(config_path) + ptr_size = int(cfg['Arch']['PointerSize']) + + with open(args.symbols, 'r') as f: + funs = [s.strip() for s in f.readlines()] + + # Generate assembly code, containing a table for the resolved symbols and the + # trampolines. + lib_name, _ = os.path.splitext(os.path.basename(args.symbols)) + + with open(os.path.join(args.outdir, f'{lib_name}.tramp.S'), 'w') as f: + with open(table_path, 'r') as t: + table_text = string.Template(t.read()).substitute( + lib_suffix=lib_name, table_size=ptr_size * (len(funs) + 1) + ) + f.write(table_text) + + with open(trampoline_path, 'r') as t: + tramp_tpl = string.Template(t.read()) + + for i, name in enumerate(funs): + tramp_text = tramp_tpl.substitute( + lib_suffix=lib_name, sym=name, offset=i * ptr_size, number=i + ) + f.write(tramp_text) + + # Generates a list of symbols, formatted as a list of C++ strings. + with open(os.path.join(args.outdir, f'{lib_name}.inc'), 'w') as f: + sym_names = ''.join(f' "{name}",\n' for name in funs) + f.write(sym_names) + + +if __name__ == '__main__': + main() diff --git a/third_party/implib_so/workspace.bzl b/third_party/implib_so/workspace.bzl new file mode 100644 index 00000000000000..01dad3b169f402 --- /dev/null +++ b/third_party/implib_so/workspace.bzl @@ -0,0 +1,13 @@ +"""Implib.so is a simple equivalent of Windows DLL import libraries for POSIX +shared libraries.""" + +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") + +def repo(): + tf_http_archive( + name = "implib_so", + strip_prefix = "Implib.so-5fb84c2a750434b9df1da67d67b749eb929598f1", + sha256 = "10de0a616df24849f2a883747784c115f209708960e44556f5ce384de6f103e8", + urls = tf_mirror_urls("https://github.com/yugr/Implib.so/archive/5fb84c2a750434b9df1da67d67b749eb929598f1.tar.gz"), + build_file = "//third_party/implib_so:implib_so.BUILD", + ) diff --git a/third_party/xla/opensource_only.files b/third_party/xla/opensource_only.files index 690bac46ceb990..0425cd3bc0d34a 100644 --- a/third_party/xla/opensource_only.files +++ b/third_party/xla/opensource_only.files @@ -56,6 +56,9 @@ third_party/gpus/rocm/build_defs.bzl.tpl: third_party/gpus/rocm/rocm_config.h.tpl: third_party/gpus/rocm_configure.bzl: third_party/grpc/BUILD: +third_party/implib_so/BUILD: +third_party/implib_so/get_symbols.py: +third_party/implib_so/make_stub.py: third_party/llvm_openmp/BUILD: third_party/llvm_openmp/cmake_vars.bzl: third_party/llvm_openmp/expand_cmake_vars:.py diff --git a/third_party/xla/third_party/implib_so/BUILD b/third_party/xla/third_party/implib_so/BUILD new file mode 100644 index 00000000000000..8401d6152b88ab --- /dev/null +++ b/third_party/xla/third_party/implib_so/BUILD @@ -0,0 +1,23 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # MIT + +py_binary( + name = "get_symbols", + srcs = ["get_symbols.py"], + visibility = ["//visibility:public"], + deps = [ + "@bazel_tools//tools/python/runfiles", + "@implib_so//:implib_gen_lib", + ], +) + +py_binary( + name = "make_stub", + srcs = ["make_stub.py"], + visibility = ["//visibility:public"], + deps = [ + "@bazel_tools//tools/python/runfiles", + "@implib_so//:implib_gen_lib", + ], +) diff --git a/third_party/xla/third_party/implib_so/get_symbols.py b/third_party/xla/third_party/implib_so/get_symbols.py new file mode 100644 index 00000000000000..9625052f7b69b6 --- /dev/null +++ b/third_party/xla/third_party/implib_so/get_symbols.py @@ -0,0 +1,38 @@ +"""Given a .so file, lists symbols that should be included in a stub. + +Example usage: +$ bazel run -c opt @local_tsl//third_party/implib_so:get_symbols +/usr/local/cuda/lib64/libcudart.so > third_party/tsl/tsl/cuda/cudart.symbols +""" + +import argparse +import importlib + +# We can't import implib-gen directly because it has a dash in its name. +implib = importlib.import_module('implib-gen') + + +def _is_exported_function(s): + return ( + s['Bind'] != 'LOCAL' + and s['Type'] == 'FUNC' + and s['Ndx'] != 'UND' + and s['Name'] not in ['', '_init', '_fini'] + and s['Default'] + ) + + +def main(): + parser = argparse.ArgumentParser( + description='Extracts a list of symbols from a shared library' + ) + parser.add_argument('library', help='Path to the .so file.') + args = parser.parse_args() + syms = implib.collect_syms(args.library) + funs = [s['Name'] for s in syms if _is_exported_function(s)] + for f in sorted(funs): + print(f) + + +if __name__ == '__main__': + main() diff --git a/third_party/xla/third_party/implib_so/implib_so.BUILD b/third_party/xla/third_party/implib_so/implib_so.BUILD new file mode 100644 index 00000000000000..bbfb2898eb12dd --- /dev/null +++ b/third_party/xla/third_party/implib_so/implib_so.BUILD @@ -0,0 +1,20 @@ +# Description: +# Implib.so is a simple equivalent of Windows DLL import libraries for POSIX +# shared libraries. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # MIT + +exports_files([ + "LICENSE.txt", +]) + +py_library( + name = "implib_gen_lib", + srcs = ["implib-gen.py"], + data = glob([ + "arch/**/*.S.tpl", + "arch/**/*.ini", + ]), +) diff --git a/third_party/xla/third_party/implib_so/make_stub.py b/third_party/xla/third_party/implib_so/make_stub.py new file mode 100644 index 00000000000000..f0e1fe564c0c17 --- /dev/null +++ b/third_party/xla/third_party/implib_so/make_stub.py @@ -0,0 +1,68 @@ +"""Given a list of symbols, generates a stub.""" + +import argparse +import configparser +import os +import string + +from bazel_tools.tools.python.runfiles import runfiles + +r = runfiles.Create() + + +def main(): + parser = argparse.ArgumentParser( + description='Generates stubs for CUDA libraries.' + ) + parser.add_argument('symbols', help='File containing a list of symbols.') + parser.add_argument( + '--outdir', '-o', help='Path to create wrapper at', default='.' + ) + parser.add_argument( + '--target', + help='Target platform name, e.g. x86_64, aarch64.', + required=True, + ) + args = parser.parse_args() + + config_path = r.Rlocation(f'implib_so/arch/{args.target}/config.ini') + table_path = r.Rlocation(f'implib_so/arch/{args.target}/table.S.tpl') + trampoline_path = r.Rlocation( + f'implib_so/arch/{args.target}/trampoline.S.tpl' + ) + + cfg = configparser.ConfigParser(inline_comment_prefixes=';') + cfg.read(config_path) + ptr_size = int(cfg['Arch']['PointerSize']) + + with open(args.symbols, 'r') as f: + funs = [s.strip() for s in f.readlines()] + + # Generate assembly code, containing a table for the resolved symbols and the + # trampolines. + lib_name, _ = os.path.splitext(os.path.basename(args.symbols)) + + with open(os.path.join(args.outdir, f'{lib_name}.tramp.S'), 'w') as f: + with open(table_path, 'r') as t: + table_text = string.Template(t.read()).substitute( + lib_suffix=lib_name, table_size=ptr_size * (len(funs) + 1) + ) + f.write(table_text) + + with open(trampoline_path, 'r') as t: + tramp_tpl = string.Template(t.read()) + + for i, name in enumerate(funs): + tramp_text = tramp_tpl.substitute( + lib_suffix=lib_name, sym=name, offset=i * ptr_size, number=i + ) + f.write(tramp_text) + + # Generates a list of symbols, formatted as a list of C++ strings. + with open(os.path.join(args.outdir, f'{lib_name}.inc'), 'w') as f: + sym_names = ''.join(f' "{name}",\n' for name in funs) + f.write(sym_names) + + +if __name__ == '__main__': + main() diff --git a/third_party/xla/third_party/implib_so/workspace.bzl b/third_party/xla/third_party/implib_so/workspace.bzl new file mode 100644 index 00000000000000..01dad3b169f402 --- /dev/null +++ b/third_party/xla/third_party/implib_so/workspace.bzl @@ -0,0 +1,13 @@ +"""Implib.so is a simple equivalent of Windows DLL import libraries for POSIX +shared libraries.""" + +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") + +def repo(): + tf_http_archive( + name = "implib_so", + strip_prefix = "Implib.so-5fb84c2a750434b9df1da67d67b749eb929598f1", + sha256 = "10de0a616df24849f2a883747784c115f209708960e44556f5ce384de6f103e8", + urls = tf_mirror_urls("https://github.com/yugr/Implib.so/archive/5fb84c2a750434b9df1da67d67b749eb929598f1.tar.gz"), + build_file = "//third_party/implib_so:implib_so.BUILD", + ) diff --git a/third_party/xla/third_party/tsl/opensource_only.files b/third_party/xla/third_party/tsl/opensource_only.files index 4be161df5e2059..4a5a1e2a7c9f79 100644 --- a/third_party/xla/third_party/tsl/opensource_only.files +++ b/third_party/xla/third_party/tsl/opensource_only.files @@ -53,6 +53,9 @@ third_party/gpus/rocm/build_defs.bzl.tpl: third_party/gpus/rocm/rocm_config.h.tpl: third_party/gpus/rocm_configure.bzl: third_party/grpc/BUILD: +third_party/implib_so/BUILD: +third_party/implib_so/get_symbols.py: +third_party/implib_so/make_stub.py: third_party/llvm_openmp/BUILD: third_party/llvm_openmp/cmake_vars.bzl: third_party/llvm_openmp/expand_cmake_vars:.py @@ -145,6 +148,7 @@ tools/toolchains/win/bazel_211/BUILD: tools/toolchains/win/tf_win_05022023/BUILD: tools/toolchains/win_1803/py38/BUILD: tools/toolchains/win_1803/py39/BUILD: +tsl/cuda/stub.bzl: tsl/mkl/BUILD: tsl/mkl/LICENSE: tsl/mkl/MKL_LICENSE: diff --git a/third_party/xla/third_party/tsl/third_party/implib_so/BUILD b/third_party/xla/third_party/tsl/third_party/implib_so/BUILD new file mode 100644 index 00000000000000..8401d6152b88ab --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/implib_so/BUILD @@ -0,0 +1,23 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # MIT + +py_binary( + name = "get_symbols", + srcs = ["get_symbols.py"], + visibility = ["//visibility:public"], + deps = [ + "@bazel_tools//tools/python/runfiles", + "@implib_so//:implib_gen_lib", + ], +) + +py_binary( + name = "make_stub", + srcs = ["make_stub.py"], + visibility = ["//visibility:public"], + deps = [ + "@bazel_tools//tools/python/runfiles", + "@implib_so//:implib_gen_lib", + ], +) diff --git a/third_party/xla/third_party/tsl/third_party/implib_so/get_symbols.py b/third_party/xla/third_party/tsl/third_party/implib_so/get_symbols.py new file mode 100644 index 00000000000000..9625052f7b69b6 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/implib_so/get_symbols.py @@ -0,0 +1,38 @@ +"""Given a .so file, lists symbols that should be included in a stub. + +Example usage: +$ bazel run -c opt @local_tsl//third_party/implib_so:get_symbols +/usr/local/cuda/lib64/libcudart.so > third_party/tsl/tsl/cuda/cudart.symbols +""" + +import argparse +import importlib + +# We can't import implib-gen directly because it has a dash in its name. +implib = importlib.import_module('implib-gen') + + +def _is_exported_function(s): + return ( + s['Bind'] != 'LOCAL' + and s['Type'] == 'FUNC' + and s['Ndx'] != 'UND' + and s['Name'] not in ['', '_init', '_fini'] + and s['Default'] + ) + + +def main(): + parser = argparse.ArgumentParser( + description='Extracts a list of symbols from a shared library' + ) + parser.add_argument('library', help='Path to the .so file.') + args = parser.parse_args() + syms = implib.collect_syms(args.library) + funs = [s['Name'] for s in syms if _is_exported_function(s)] + for f in sorted(funs): + print(f) + + +if __name__ == '__main__': + main() diff --git a/third_party/xla/third_party/tsl/third_party/implib_so/implib_so.BUILD b/third_party/xla/third_party/tsl/third_party/implib_so/implib_so.BUILD new file mode 100644 index 00000000000000..bbfb2898eb12dd --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/implib_so/implib_so.BUILD @@ -0,0 +1,20 @@ +# Description: +# Implib.so is a simple equivalent of Windows DLL import libraries for POSIX +# shared libraries. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # MIT + +exports_files([ + "LICENSE.txt", +]) + +py_library( + name = "implib_gen_lib", + srcs = ["implib-gen.py"], + data = glob([ + "arch/**/*.S.tpl", + "arch/**/*.ini", + ]), +) diff --git a/third_party/xla/third_party/tsl/third_party/implib_so/make_stub.py b/third_party/xla/third_party/tsl/third_party/implib_so/make_stub.py new file mode 100644 index 00000000000000..f0e1fe564c0c17 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/implib_so/make_stub.py @@ -0,0 +1,68 @@ +"""Given a list of symbols, generates a stub.""" + +import argparse +import configparser +import os +import string + +from bazel_tools.tools.python.runfiles import runfiles + +r = runfiles.Create() + + +def main(): + parser = argparse.ArgumentParser( + description='Generates stubs for CUDA libraries.' + ) + parser.add_argument('symbols', help='File containing a list of symbols.') + parser.add_argument( + '--outdir', '-o', help='Path to create wrapper at', default='.' + ) + parser.add_argument( + '--target', + help='Target platform name, e.g. x86_64, aarch64.', + required=True, + ) + args = parser.parse_args() + + config_path = r.Rlocation(f'implib_so/arch/{args.target}/config.ini') + table_path = r.Rlocation(f'implib_so/arch/{args.target}/table.S.tpl') + trampoline_path = r.Rlocation( + f'implib_so/arch/{args.target}/trampoline.S.tpl' + ) + + cfg = configparser.ConfigParser(inline_comment_prefixes=';') + cfg.read(config_path) + ptr_size = int(cfg['Arch']['PointerSize']) + + with open(args.symbols, 'r') as f: + funs = [s.strip() for s in f.readlines()] + + # Generate assembly code, containing a table for the resolved symbols and the + # trampolines. + lib_name, _ = os.path.splitext(os.path.basename(args.symbols)) + + with open(os.path.join(args.outdir, f'{lib_name}.tramp.S'), 'w') as f: + with open(table_path, 'r') as t: + table_text = string.Template(t.read()).substitute( + lib_suffix=lib_name, table_size=ptr_size * (len(funs) + 1) + ) + f.write(table_text) + + with open(trampoline_path, 'r') as t: + tramp_tpl = string.Template(t.read()) + + for i, name in enumerate(funs): + tramp_text = tramp_tpl.substitute( + lib_suffix=lib_name, sym=name, offset=i * ptr_size, number=i + ) + f.write(tramp_text) + + # Generates a list of symbols, formatted as a list of C++ strings. + with open(os.path.join(args.outdir, f'{lib_name}.inc'), 'w') as f: + sym_names = ''.join(f' "{name}",\n' for name in funs) + f.write(sym_names) + + +if __name__ == '__main__': + main() diff --git a/third_party/xla/third_party/tsl/third_party/implib_so/workspace.bzl b/third_party/xla/third_party/tsl/third_party/implib_so/workspace.bzl new file mode 100644 index 00000000000000..01dad3b169f402 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/implib_so/workspace.bzl @@ -0,0 +1,13 @@ +"""Implib.so is a simple equivalent of Windows DLL import libraries for POSIX +shared libraries.""" + +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") + +def repo(): + tf_http_archive( + name = "implib_so", + strip_prefix = "Implib.so-5fb84c2a750434b9df1da67d67b749eb929598f1", + sha256 = "10de0a616df24849f2a883747784c115f209708960e44556f5ce384de6f103e8", + urls = tf_mirror_urls("https://github.com/yugr/Implib.so/archive/5fb84c2a750434b9df1da67d67b749eb929598f1.tar.gz"), + build_file = "//third_party/implib_so:implib_so.BUILD", + ) diff --git a/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel index ecb7bdb2cfffa7..71bed13a8bae96 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel +++ b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel @@ -1,6 +1,7 @@ # Description: # Stubs for dynamically loading CUDA. +load("//tsl/cuda:stub.bzl", "cuda_stub") load( "//tsl/platform:build_config.bzl", "tsl_cc_test", @@ -20,25 +21,48 @@ package( licenses = ["notice"], ) +cuda_stub( + name = "cublas", + srcs = ["cublas.symbols"], +) + cc_library( name = "cublas", - srcs = if_cuda_is_configured(["cublas_stub.cc"]), + srcs = if_cuda_is_configured([ + "cublas_stub.cc", + "cublas.tramp.S", + ]), linkopts = if_cuda_is_configured(cuda_rpath_flags( "nvidia/cublas/lib", )), - textual_hdrs = glob(["cublas_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cublas.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ + "@com_google_absl//absl/container:flat_hash_set", "@local_config_cuda//cuda:cuda_headers", "//tsl/platform:dso_loader", "//tsl/platform:env", ]), ) +cuda_stub( + name = "cublasLt", + srcs = ["cublasLt.symbols"], +) + cc_library( name = "cublas_lt", - srcs = if_cuda_is_configured(["cublasLt_stub.cc"]), - textual_hdrs = glob(["cublasLt_*.inc"]), + srcs = if_cuda_is_configured([ + "cublasLt_stub.cc", + "cublasLt.tramp.S", + ]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cublasLt.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", @@ -47,10 +71,21 @@ cc_library( ]), ) +cuda_stub( + name = "cuda", + srcs = ["cuda.symbols"], +) + cc_library( name = "cuda", - srcs = if_cuda_is_configured(["cuda_stub.cc"]), - textual_hdrs = glob(["cuda_*.inc"]), + srcs = if_cuda_is_configured([ + "cuda_stub.cc", + "cuda.tramp.S", + ]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cuda.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", @@ -59,18 +94,29 @@ cc_library( ]), ) +cuda_stub( + name = "cudart", + srcs = ["cudart.symbols"], +) + cc_library( name = "cudart", srcs = select({ # include dynamic loading implementation only when if_cuda_is_configured and build dynamically - "//tsl:is_cuda_enabled_and_oss": ["cudart_stub.cc"], + "//tsl:is_cuda_enabled_and_oss": [ + "cudart.tramp.S", + "cudart_stub.cc", + ], "//conditions:default": [], }), linkopts = select({ "//tsl:is_cuda_enabled_and_oss": cuda_rpath_flags("nvidia/cuda_runtime/lib"), "//conditions:default": [], }), - textual_hdrs = glob(["cuda_runtime_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cudart.inc"], visibility = ["//visibility:public"], deps = select({ "//tsl:is_cuda_enabled_and_oss": [ @@ -83,11 +129,22 @@ cc_library( }), ) +cuda_stub( + name = "cudnn", + srcs = ["cudnn.symbols"], +) + cc_library( name = "cudnn", - srcs = if_cuda_is_configured(["cudnn_stub.cc"]), + srcs = if_cuda_is_configured([ + "cudnn_stub.cc", + "cudnn.tramp.S", + ]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cudnn/lib")), - textual_hdrs = glob(["cudnn_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cudnn.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ ":cudnn_version", @@ -130,11 +187,22 @@ tsl_cc_test( ], ) +cuda_stub( + name = "cufft", + srcs = ["cufft.symbols"], +) + cc_library( name = "cufft", - srcs = if_cuda_is_configured(["cufft_stub.cc"]), + srcs = if_cuda_is_configured([ + "cufft_stub.cc", + "cufft.tramp.S", + ]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cufft/lib")), - textual_hdrs = glob(["cufft_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cufft.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", @@ -143,12 +211,23 @@ cc_library( ]), ) +cuda_stub( + name = "cupti", + srcs = ["cupti.symbols"], +) + cc_library( name = "cupti", - srcs = if_cuda_is_configured(["cupti_stub.cc"]), + srcs = if_cuda_is_configured([ + "cupti_stub.cc", + "cupti.tramp.S", + ]), data = if_cuda_is_configured(["@local_config_cuda//cuda:cupti_dsos"]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cuda_cupti/lib")), - textual_hdrs = glob(["cupti_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cupti.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", @@ -158,11 +237,22 @@ cc_library( ]), ) +cuda_stub( + name = "cusolver", + srcs = ["cusolver.symbols"], +) + cc_library( name = "cusolver", - srcs = if_cuda_is_configured(["cusolver_stub.cc"]), + srcs = if_cuda_is_configured([ + "cusolver_stub.cc", + "cusolver.tramp.S", + ]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cusolver/lib")), - textual_hdrs = glob(["cusolver_dense_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cusolver.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", @@ -171,11 +261,22 @@ cc_library( ]), ) +cuda_stub( + name = "cusparse", + srcs = ["cusparse.symbols"], +) + cc_library( name = "cusparse", - srcs = if_cuda_is_configured(["cusparse_stub.cc"]), + srcs = if_cuda_is_configured([ + "cusparse_stub.cc", + "cusparse.tramp.S", + ]), linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cusparse/lib")), - textual_hdrs = glob(["cusparse_*.inc"]), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["cusparse.inc"], visibility = ["//visibility:public"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublas.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cublas.symbols new file mode 100644 index 00000000000000..bd93675eb5f299 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cublas.symbols @@ -0,0 +1,736 @@ +cublasAlloc +cublasAsumEx +cublasAsumEx_64 +cublasAxpyEx +cublasAxpyEx_64 +cublasCaxpy +cublasCaxpy_v2 +cublasCaxpy_v2_64 +cublasCbdmm +cublasCcopy +cublasCcopy_v2 +cublasCcopy_v2_64 +cublasCdgmm +cublasCdgmm_64 +cublasCdotc +cublasCdotc_v2 +cublasCdotc_v2_64 +cublasCdotu +cublasCdotu_v2 +cublasCdotu_v2_64 +cublasCgbmv +cublasCgbmv_v2 +cublasCgbmv_v2_64 +cublasCgeam +cublasCgeam_64 +cublasCgelsBatched +cublasCgemm +cublasCgemm3m +cublasCgemm3mBatched +cublasCgemm3mBatched_64 +cublasCgemm3mEx +cublasCgemm3mEx_64 +cublasCgemm3mStridedBatched +cublasCgemm3mStridedBatched_64 +cublasCgemm3m_64 +cublasCgemmBatched +cublasCgemmBatched_64 +cublasCgemmEx +cublasCgemmEx_64 +cublasCgemmStridedBatched +cublasCgemmStridedBatched_64 +cublasCgemm_v2 +cublasCgemm_v2_64 +cublasCgemv +cublasCgemvBatched +cublasCgemvBatched_64 +cublasCgemvStridedBatched +cublasCgemvStridedBatched_64 +cublasCgemv_v2 +cublasCgemv_v2_64 +cublasCgeqrfBatched +cublasCgerc +cublasCgerc_v2 +cublasCgerc_v2_64 +cublasCgeru +cublasCgeru_v2 +cublasCgeru_v2_64 +cublasCgetrfBatched +cublasCgetriBatched +cublasCgetrsBatched +cublasChbmv +cublasChbmv_v2 +cublasChbmv_v2_64 +cublasChemm +cublasChemm_v2 +cublasChemm_v2_64 +cublasChemv +cublasChemv_v2 +cublasChemv_v2_64 +cublasCher +cublasCher2 +cublasCher2_v2 +cublasCher2_v2_64 +cublasCher2k +cublasCher2k_v2 +cublasCher2k_v2_64 +cublasCher_v2 +cublasCher_v2_64 +cublasCherk +cublasCherk3mEx +cublasCherk3mEx_64 +cublasCherkEx +cublasCherkEx_64 +cublasCherk_v2 +cublasCherk_v2_64 +cublasCherkx +cublasCherkx_64 +cublasChpmv +cublasChpmv_v2 +cublasChpmv_v2_64 +cublasChpr +cublasChpr2 +cublasChpr2_v2 +cublasChpr2_v2_64 +cublasChpr_v2 +cublasChpr_v2_64 +cublasCmatinvBatched +cublasCopyEx +cublasCopyEx_64 +cublasCreate_v2 +cublasCrot +cublasCrot_v2 +cublasCrot_v2_64 +cublasCrotg +cublasCrotg_v2 +cublasCscal +cublasCscal_v2 +cublasCscal_v2_64 +cublasCsrot +cublasCsrot_v2 +cublasCsrot_v2_64 +cublasCsscal +cublasCsscal_v2 +cublasCsscal_v2_64 +cublasCswap +cublasCswap_v2 +cublasCswap_v2_64 +cublasCsymm +cublasCsymm_v2 +cublasCsymm_v2_64 +cublasCsymv_v2 +cublasCsymv_v2_64 +cublasCsyr2_v2 +cublasCsyr2_v2_64 +cublasCsyr2k +cublasCsyr2k_v2 +cublasCsyr2k_v2_64 +cublasCsyr_v2 +cublasCsyr_v2_64 +cublasCsyrk +cublasCsyrk3mEx +cublasCsyrk3mEx_64 +cublasCsyrkEx +cublasCsyrkEx_64 +cublasCsyrk_v2 +cublasCsyrk_v2_64 +cublasCsyrkx +cublasCsyrkx_64 +cublasCtbmv +cublasCtbmv_v2 +cublasCtbmv_v2_64 +cublasCtbsv +cublasCtbsv_v2 +cublasCtbsv_v2_64 +cublasCtpmv +cublasCtpmv_v2 +cublasCtpmv_v2_64 +cublasCtpsv +cublasCtpsv_v2 +cublasCtpsv_v2_64 +cublasCtpttr +cublasCtrmm +cublasCtrmm_v2 +cublasCtrmm_v2_64 +cublasCtrmv +cublasCtrmv_v2 +cublasCtrmv_v2_64 +cublasCtrsm +cublasCtrsmBatched +cublasCtrsmBatched_64 +cublasCtrsm_v2 +cublasCtrsm_v2_64 +cublasCtrsv +cublasCtrsv_v2 +cublasCtrsv_v2_64 +cublasCtrttp +cublasDasum +cublasDasum_v2 +cublasDasum_v2_64 +cublasDaxpy +cublasDaxpy_v2 +cublasDaxpy_v2_64 +cublasDbdmm +cublasDcopy +cublasDcopy_v2 +cublasDcopy_v2_64 +cublasDdgmm +cublasDdgmm_64 +cublasDdot +cublasDdot_v2 +cublasDdot_v2_64 +cublasDestroy_v2 +cublasDgbmv +cublasDgbmv_v2 +cublasDgbmv_v2_64 +cublasDgeam +cublasDgeam_64 +cublasDgelsBatched +cublasDgemm +cublasDgemmBatched +cublasDgemmBatched_64 +cublasDgemmStridedBatched +cublasDgemmStridedBatched_64 +cublasDgemm_v2 +cublasDgemm_v2_64 +cublasDgemv +cublasDgemvBatched +cublasDgemvBatched_64 +cublasDgemvStridedBatched +cublasDgemvStridedBatched_64 +cublasDgemv_v2 +cublasDgemv_v2_64 +cublasDgeqrfBatched +cublasDger +cublasDger_v2 +cublasDger_v2_64 +cublasDgetrfBatched +cublasDgetriBatched +cublasDgetrsBatched +cublasDmatinvBatched +cublasDnrm2 +cublasDnrm2_v2 +cublasDnrm2_v2_64 +cublasDotEx +cublasDotEx_64 +cublasDotcEx +cublasDotcEx_64 +cublasDrot +cublasDrot_v2 +cublasDrot_v2_64 +cublasDrotg +cublasDrotg_v2 +cublasDrotm +cublasDrotm_v2 +cublasDrotm_v2_64 +cublasDrotmg +cublasDrotmg_v2 +cublasDsbmv +cublasDsbmv_v2 +cublasDsbmv_v2_64 +cublasDscal +cublasDscal_v2 +cublasDscal_v2_64 +cublasDspmv +cublasDspmv_v2 +cublasDspmv_v2_64 +cublasDspr +cublasDspr2 +cublasDspr2_v2 +cublasDspr2_v2_64 +cublasDspr_v2 +cublasDspr_v2_64 +cublasDswap +cublasDswap_v2 +cublasDswap_v2_64 +cublasDsymm +cublasDsymm_v2 +cublasDsymm_v2_64 +cublasDsymv +cublasDsymv_v2 +cublasDsymv_v2_64 +cublasDsyr +cublasDsyr2 +cublasDsyr2_v2 +cublasDsyr2_v2_64 +cublasDsyr2k +cublasDsyr2k_v2 +cublasDsyr2k_v2_64 +cublasDsyr_v2 +cublasDsyr_v2_64 +cublasDsyrk +cublasDsyrk_v2 +cublasDsyrk_v2_64 +cublasDsyrkx +cublasDsyrkx_64 +cublasDtbmv +cublasDtbmv_v2 +cublasDtbmv_v2_64 +cublasDtbsv +cublasDtbsv_v2 +cublasDtbsv_v2_64 +cublasDtpmv +cublasDtpmv_v2 +cublasDtpmv_v2_64 +cublasDtpsv +cublasDtpsv_v2 +cublasDtpsv_v2_64 +cublasDtpttr +cublasDtrmm +cublasDtrmm_v2 +cublasDtrmm_v2_64 +cublasDtrmv +cublasDtrmv_v2 +cublasDtrmv_v2_64 +cublasDtrsm +cublasDtrsmBatched +cublasDtrsmBatched_64 +cublasDtrsm_v2 +cublasDtrsm_v2_64 +cublasDtrsv +cublasDtrsv_v2 +cublasDtrsv_v2_64 +cublasDtrttp +cublasDzasum +cublasDzasum_v2 +cublasDzasum_v2_64 +cublasDznrm2 +cublasDznrm2_v2 +cublasDznrm2_v2_64 +cublasFree +cublasGemmBatchedEx +cublasGemmBatchedEx_64 +cublasGemmEx +cublasGemmEx_64 +cublasGemmStridedBatchedEx +cublasGemmStridedBatchedEx_64 +cublasGetAtomicsMode +cublasGetBackdoor +cublasGetCudartVersion +cublasGetError +cublasGetLoggerCallback +cublasGetMathMode +cublasGetMatrix +cublasGetMatrixAsync +cublasGetMatrixAsync_64 +cublasGetMatrix_64 +cublasGetPointerMode_v2 +cublasGetProperty +cublasGetSmCountTarget +cublasGetStatusName +cublasGetStatusString +cublasGetStream_v2 +cublasGetVector +cublasGetVectorAsync +cublasGetVectorAsync_64 +cublasGetVector_64 +cublasGetVersion +cublasGetVersion_v2 +cublasHSHgemvBatched +cublasHSHgemvBatched_64 +cublasHSHgemvStridedBatched +cublasHSHgemvStridedBatched_64 +cublasHSSgemvBatched +cublasHSSgemvBatched_64 +cublasHSSgemvStridedBatched +cublasHSSgemvStridedBatched_64 +cublasHgemm +cublasHgemmBatched +cublasHgemmBatched_64 +cublasHgemmStridedBatched +cublasHgemmStridedBatched_64 +cublasHgemm_64 +cublasIamaxEx +cublasIamaxEx_64 +cublasIaminEx +cublasIaminEx_64 +cublasIcamax +cublasIcamax_v2 +cublasIcamax_v2_64 +cublasIcamin +cublasIcamin_v2 +cublasIcamin_v2_64 +cublasIdamax +cublasIdamax_v2 +cublasIdamax_v2_64 +cublasIdamin +cublasIdamin_v2 +cublasIdamin_v2_64 +cublasInit +cublasIsamax +cublasIsamax_v2 +cublasIsamax_v2_64 +cublasIsamin +cublasIsamin_v2 +cublasIsamin_v2_64 +cublasIzamax +cublasIzamax_v2 +cublasIzamax_v2_64 +cublasIzamin +cublasIzamin_v2 +cublasIzamin_v2_64 +cublasLoggerConfigure +cublasNrm2Ex +cublasNrm2Ex_64 +cublasRotEx +cublasRotEx_64 +cublasRotgEx +cublasRotmEx +cublasRotmEx_64 +cublasRotmgEx +cublasSasum +cublasSasum_v2 +cublasSasum_v2_64 +cublasSaxpy +cublasSaxpy_v2 +cublasSaxpy_v2_64 +cublasSbdmm +cublasScalEx +cublasScalEx_64 +cublasScasum +cublasScasum_v2 +cublasScasum_v2_64 +cublasScnrm2 +cublasScnrm2_v2 +cublasScnrm2_v2_64 +cublasScopy +cublasScopy_v2 +cublasScopy_v2_64 +cublasSdgmm +cublasSdgmm_64 +cublasSdot +cublasSdot_v2 +cublasSdot_v2_64 +cublasSetAtomicsMode +cublasSetBackdoor +cublasSetBackdoorEx +cublasSetKernelStream +cublasSetLoggerCallback +cublasSetMathMode +cublasSetMatrix +cublasSetMatrixAsync +cublasSetMatrixAsync_64 +cublasSetMatrix_64 +cublasSetPointerMode_v2 +cublasSetSmCountTarget +cublasSetStream_v2 +cublasSetVector +cublasSetVectorAsync +cublasSetVectorAsync_64 +cublasSetVector_64 +cublasSetWorkspace_v2 +cublasSgbmv +cublasSgbmv_v2 +cublasSgbmv_v2_64 +cublasSgeam +cublasSgeam_64 +cublasSgelsBatched +cublasSgemm +cublasSgemmBatched +cublasSgemmBatched_64 +cublasSgemmEx +cublasSgemmEx_64 +cublasSgemmStridedBatched +cublasSgemmStridedBatched_64 +cublasSgemm_v2 +cublasSgemm_v2_64 +cublasSgemv +cublasSgemvBatched +cublasSgemvBatched_64 +cublasSgemvStridedBatched +cublasSgemvStridedBatched_64 +cublasSgemv_v2 +cublasSgemv_v2_64 +cublasSgeqrfBatched +cublasSger +cublasSger_v2 +cublasSger_v2_64 +cublasSgetrfBatched +cublasSgetriBatched +cublasSgetrsBatched +cublasShutdown +cublasSmatinvBatched +cublasSnrm2 +cublasSnrm2_v2 +cublasSnrm2_v2_64 +cublasSrot +cublasSrot_v2 +cublasSrot_v2_64 +cublasSrotg +cublasSrotg_v2 +cublasSrotm +cublasSrotm_v2 +cublasSrotm_v2_64 +cublasSrotmg +cublasSrotmg_v2 +cublasSsbmv +cublasSsbmv_v2 +cublasSsbmv_v2_64 +cublasSscal +cublasSscal_v2 +cublasSscal_v2_64 +cublasSspmv +cublasSspmv_v2 +cublasSspmv_v2_64 +cublasSspr +cublasSspr2 +cublasSspr2_v2 +cublasSspr2_v2_64 +cublasSspr_v2 +cublasSspr_v2_64 +cublasSswap +cublasSswap_v2 +cublasSswap_v2_64 +cublasSsymm +cublasSsymm_v2 +cublasSsymm_v2_64 +cublasSsymv +cublasSsymv_v2 +cublasSsymv_v2_64 +cublasSsyr +cublasSsyr2 +cublasSsyr2_v2 +cublasSsyr2_v2_64 +cublasSsyr2k +cublasSsyr2k_v2 +cublasSsyr2k_v2_64 +cublasSsyr_v2 +cublasSsyr_v2_64 +cublasSsyrk +cublasSsyrk_v2 +cublasSsyrk_v2_64 +cublasSsyrkx +cublasSsyrkx_64 +cublasStbmv +cublasStbmv_v2 +cublasStbmv_v2_64 +cublasStbsv +cublasStbsv_v2 +cublasStbsv_v2_64 +cublasStpmv +cublasStpmv_v2 +cublasStpmv_v2_64 +cublasStpsv +cublasStpsv_v2 +cublasStpsv_v2_64 +cublasStpttr +cublasStrmm +cublasStrmm_v2 +cublasStrmm_v2_64 +cublasStrmv +cublasStrmv_v2 +cublasStrmv_v2_64 +cublasStrsm +cublasStrsmBatched +cublasStrsmBatched_64 +cublasStrsm_v2 +cublasStrsm_v2_64 +cublasStrsv +cublasStrsv_v2 +cublasStrsv_v2_64 +cublasStrttp +cublasSwapEx +cublasSwapEx_64 +cublasTSSgemvBatched +cublasTSSgemvBatched_64 +cublasTSSgemvStridedBatched +cublasTSSgemvStridedBatched_64 +cublasTSTgemvBatched +cublasTSTgemvBatched_64 +cublasTSTgemvStridedBatched +cublasTSTgemvStridedBatched_64 +cublasUint8gemmBias +cublasXerbla +cublasXtCgemm +cublasXtChemm +cublasXtCher2k +cublasXtCherk +cublasXtCherkx +cublasXtCreate +cublasXtCspmm +cublasXtCsymm +cublasXtCsyr2k +cublasXtCsyrk +cublasXtCsyrkx +cublasXtCtrmm +cublasXtCtrsm +cublasXtDestroy +cublasXtDeviceSelect +cublasXtDgemm +cublasXtDspmm +cublasXtDsymm +cublasXtDsyr2k +cublasXtDsyrk +cublasXtDsyrkx +cublasXtDtrmm +cublasXtDtrsm +cublasXtGetBlockDim +cublasXtGetNumBoards +cublasXtGetPinningMemMode +cublasXtMaxBoards +cublasXtSetBlockDim +cublasXtSetCpuRatio +cublasXtSetCpuRoutine +cublasXtSetPinningMemMode +cublasXtSgemm +cublasXtSspmm +cublasXtSsymm +cublasXtSsyr2k +cublasXtSsyrk +cublasXtSsyrkx +cublasXtStrmm +cublasXtStrsm +cublasXtZgemm +cublasXtZhemm +cublasXtZher2k +cublasXtZherk +cublasXtZherkx +cublasXtZspmm +cublasXtZsymm +cublasXtZsyr2k +cublasXtZsyrk +cublasXtZsyrkx +cublasXtZtrmm +cublasXtZtrsm +cublasZaxpy +cublasZaxpy_v2 +cublasZaxpy_v2_64 +cublasZbdmm +cublasZcopy +cublasZcopy_v2 +cublasZcopy_v2_64 +cublasZdgmm +cublasZdgmm_64 +cublasZdotc +cublasZdotc_v2 +cublasZdotc_v2_64 +cublasZdotu +cublasZdotu_v2 +cublasZdotu_v2_64 +cublasZdrot +cublasZdrot_v2 +cublasZdrot_v2_64 +cublasZdscal +cublasZdscal_v2 +cublasZdscal_v2_64 +cublasZgbmv +cublasZgbmv_v2 +cublasZgbmv_v2_64 +cublasZgeam +cublasZgeam_64 +cublasZgelsBatched +cublasZgemm +cublasZgemm3m +cublasZgemm3m_64 +cublasZgemmBatched +cublasZgemmBatched_64 +cublasZgemmStridedBatched +cublasZgemmStridedBatched_64 +cublasZgemm_v2 +cublasZgemm_v2_64 +cublasZgemv +cublasZgemvBatched +cublasZgemvBatched_64 +cublasZgemvStridedBatched +cublasZgemvStridedBatched_64 +cublasZgemv_v2 +cublasZgemv_v2_64 +cublasZgeqrfBatched +cublasZgerc +cublasZgerc_v2 +cublasZgerc_v2_64 +cublasZgeru +cublasZgeru_v2 +cublasZgeru_v2_64 +cublasZgetrfBatched +cublasZgetriBatched +cublasZgetrsBatched +cublasZhbmv +cublasZhbmv_v2 +cublasZhbmv_v2_64 +cublasZhemm +cublasZhemm_v2 +cublasZhemm_v2_64 +cublasZhemv +cublasZhemv_v2 +cublasZhemv_v2_64 +cublasZher +cublasZher2 +cublasZher2_v2 +cublasZher2_v2_64 +cublasZher2k +cublasZher2k_v2 +cublasZher2k_v2_64 +cublasZher_v2 +cublasZher_v2_64 +cublasZherk +cublasZherk_v2 +cublasZherk_v2_64 +cublasZherkx +cublasZherkx_64 +cublasZhpmv +cublasZhpmv_v2 +cublasZhpmv_v2_64 +cublasZhpr +cublasZhpr2 +cublasZhpr2_v2 +cublasZhpr2_v2_64 +cublasZhpr_v2 +cublasZhpr_v2_64 +cublasZmatinvBatched +cublasZrot +cublasZrot_v2 +cublasZrot_v2_64 +cublasZrotg +cublasZrotg_v2 +cublasZscal +cublasZscal_v2 +cublasZscal_v2_64 +cublasZswap +cublasZswap_v2 +cublasZswap_v2_64 +cublasZsymm +cublasZsymm_v2 +cublasZsymm_v2_64 +cublasZsymv_v2 +cublasZsymv_v2_64 +cublasZsyr2_v2 +cublasZsyr2_v2_64 +cublasZsyr2k +cublasZsyr2k_v2 +cublasZsyr2k_v2_64 +cublasZsyr_v2 +cublasZsyr_v2_64 +cublasZsyrk +cublasZsyrk_v2 +cublasZsyrk_v2_64 +cublasZsyrkx +cublasZsyrkx_64 +cublasZtbmv +cublasZtbmv_v2 +cublasZtbmv_v2_64 +cublasZtbsv +cublasZtbsv_v2 +cublasZtbsv_v2_64 +cublasZtpmv +cublasZtpmv_v2 +cublasZtpmv_v2_64 +cublasZtpsv +cublasZtpsv_v2 +cublasZtpsv_v2_64 +cublasZtpttr +cublasZtrmm +cublasZtrmm_v2 +cublasZtrmm_v2_64 +cublasZtrmv +cublasZtrmv_v2 +cublasZtrmv_v2_64 +cublasZtrsm +cublasZtrsmBatched +cublasZtrsmBatched_64 +cublasZtrsm_v2 +cublasZtrsm_v2_64 +cublasZtrsv +cublasZtrsv_v2 +cublasZtrsv_v2_64 +cublasZtrttp diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublasLt.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cublasLt.symbols new file mode 100644 index 00000000000000..7f93cfcb3ad49f --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cublasLt.symbols @@ -0,0 +1,234 @@ +cublasLtACCMatmul +cublasLtACCMatmulAlgoCapGetAttribute +cublasLtACCMatmulAlgoCheck +cublasLtACCMatmulAlgoGetHeuristic +cublasLtACCMatmulAlgoGetIds +cublasLtACCMatmulAlgoInit +cublasLtAlgoCharacteristicGetAttribute +cublasLtBIIMatmul +cublasLtBIIMatmulAlgoCapGetAttribute +cublasLtBIIMatmulAlgoCheck +cublasLtBIIMatmulAlgoGetHeuristic +cublasLtBIIMatmulAlgoGetIds +cublasLtBIIMatmulAlgoInit +cublasLtBSBMatmul +cublasLtBSBMatmulAlgoCapGetAttribute +cublasLtBSBMatmulAlgoCheck +cublasLtBSBMatmulAlgoGetHeuristic +cublasLtBSBMatmulAlgoGetIds +cublasLtBSBMatmulAlgoInit +cublasLtBSSMatmul +cublasLtBSSMatmulAlgoCapGetAttribute +cublasLtBSSMatmulAlgoCheck +cublasLtBSSMatmulAlgoGetHeuristic +cublasLtBSSMatmulAlgoGetIds +cublasLtBSSMatmulAlgoInit +cublasLtCCCMatmul +cublasLtCCCMatmulAlgoCapGetAttribute +cublasLtCCCMatmulAlgoCheck +cublasLtCCCMatmulAlgoGetHeuristic +cublasLtCCCMatmulAlgoGetIds +cublasLtCCCMatmulAlgoInit +cublasLtCreate +cublasLtCtxInit +cublasLtDDDMatmul +cublasLtDDDMatmulAlgoCapGetAttribute +cublasLtDDDMatmulAlgoCheck +cublasLtDDDMatmulAlgoGetHeuristic +cublasLtDDDMatmulAlgoGetIds +cublasLtDDDMatmulAlgoInit +cublasLtDestroy +cublasLtE4m3E4m3Fp32Bf16Bf16MatmulAlgoCapGetAttribute +cublasLtE4m3E4m3Fp32Bf16Bf16MatmulAlgoCheck +cublasLtE4m3E4m3Fp32Bf16Bf16MatmulAlgoInit +cublasLtE4m3E4m3Fp32Bf16E4m3MatmulAlgoCapGetAttribute +cublasLtE4m3E4m3Fp32Bf16E4m3MatmulAlgoCheck +cublasLtE4m3E4m3Fp32Bf16E4m3MatmulAlgoInit +cublasLtE4m3E4m3Fp32Fp16E4m3MatmulAlgoCapGetAttribute +cublasLtE4m3E4m3Fp32Fp16E4m3MatmulAlgoCheck +cublasLtE4m3E4m3Fp32Fp16E4m3MatmulAlgoInit +cublasLtE4m3E4m3Fp32Fp16Fp16MatmulAlgoCapGetAttribute +cublasLtE4m3E4m3Fp32Fp16Fp16MatmulAlgoCheck +cublasLtE4m3E4m3Fp32Fp16Fp16MatmulAlgoInit +cublasLtE4m3E4m3Fp32Fp32Fp32MatmulAlgoCapGetAttribute +cublasLtE4m3E4m3Fp32Fp32Fp32MatmulAlgoCheck +cublasLtE4m3E4m3Fp32Fp32Fp32MatmulAlgoInit +cublasLtE4m3E5m2Fp32Bf16Bf16MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Bf16Bf16MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Bf16Bf16MatmulAlgoInit +cublasLtE4m3E5m2Fp32Bf16E4m3MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Bf16E4m3MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Bf16E4m3MatmulAlgoInit +cublasLtE4m3E5m2Fp32Bf16E5m2MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Bf16E5m2MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Bf16E5m2MatmulAlgoInit +cublasLtE4m3E5m2Fp32Fp16E4m3MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Fp16E4m3MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Fp16E4m3MatmulAlgoInit +cublasLtE4m3E5m2Fp32Fp16E5m2MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Fp16E5m2MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Fp16E5m2MatmulAlgoInit +cublasLtE4m3E5m2Fp32Fp16Fp16MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Fp16Fp16MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Fp16Fp16MatmulAlgoInit +cublasLtE4m3E5m2Fp32Fp32Fp32MatmulAlgoCapGetAttribute +cublasLtE4m3E5m2Fp32Fp32Fp32MatmulAlgoCheck +cublasLtE4m3E5m2Fp32Fp32Fp32MatmulAlgoInit +cublasLtE5m2E4m3Fp32Bf16Bf16MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Bf16Bf16MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Bf16Bf16MatmulAlgoInit +cublasLtE5m2E4m3Fp32Bf16E4m3MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Bf16E4m3MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Bf16E4m3MatmulAlgoInit +cublasLtE5m2E4m3Fp32Bf16E5m2MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Bf16E5m2MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Bf16E5m2MatmulAlgoInit +cublasLtE5m2E4m3Fp32Fp16E4m3MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Fp16E4m3MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Fp16E4m3MatmulAlgoInit +cublasLtE5m2E4m3Fp32Fp16E5m2MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Fp16E5m2MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Fp16E5m2MatmulAlgoInit +cublasLtE5m2E4m3Fp32Fp16Fp16MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Fp16Fp16MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Fp16Fp16MatmulAlgoInit +cublasLtE5m2E4m3Fp32Fp32Fp32MatmulAlgoCapGetAttribute +cublasLtE5m2E4m3Fp32Fp32Fp32MatmulAlgoCheck +cublasLtE5m2E4m3Fp32Fp32Fp32MatmulAlgoInit +cublasLtGetCudartVersion +cublasLtGetProperty +cublasLtGetStatusName +cublasLtGetStatusString +cublasLtGetVersion +cublasLtHHHMatmul +cublasLtHHHMatmulAlgoCapGetAttribute +cublasLtHHHMatmulAlgoCheck +cublasLtHHHMatmulAlgoGetHeuristic +cublasLtHHHMatmulAlgoGetIds +cublasLtHHHMatmulAlgoInit +cublasLtHSHMatmul +cublasLtHSHMatmulAlgoCapGetAttribute +cublasLtHSHMatmulAlgoCheck +cublasLtHSHMatmulAlgoGetHeuristic +cublasLtHSHMatmulAlgoGetIds +cublasLtHSHMatmulAlgoInit +cublasLtHSSMatmul +cublasLtHSSMatmulAlgoCapGetAttribute +cublasLtHSSMatmulAlgoCheck +cublasLtHSSMatmulAlgoGetHeuristic +cublasLtHSSMatmulAlgoGetIds +cublasLtHSSMatmulAlgoInit +cublasLtHeuristicLutSerializeEntry +cublasLtHeuristicsCacheGetCapacity +cublasLtHeuristicsCacheSetCapacity +cublasLtKCCMatmul +cublasLtKCCMatmulAlgoCapGetAttribute +cublasLtKCCMatmulAlgoCheck +cublasLtKCCMatmulAlgoGetHeuristic +cublasLtKCCMatmulAlgoGetIds +cublasLtKCCMatmulAlgoInit +cublasLtKCKMatmul +cublasLtKCKMatmulAlgoCapGetAttribute +cublasLtKCKMatmulAlgoCheck +cublasLtKCKMatmulAlgoGetHeuristic +cublasLtKCKMatmulAlgoGetIds +cublasLtKCKMatmulAlgoInit +cublasLtLegacyGemmACC +cublasLtLegacyGemmBII +cublasLtLegacyGemmBSS +cublasLtLegacyGemmCCC +cublasLtLegacyGemmDDD +cublasLtLegacyGemmHHH +cublasLtLegacyGemmHSH +cublasLtLegacyGemmHSS +cublasLtLegacyGemmSSS +cublasLtLegacyGemmTSS +cublasLtLegacyGemmTST +cublasLtLegacyGemmUtilizationCCC +cublasLtLegacyGemmUtilizationDDD +cublasLtLegacyGemmUtilizationZZZ +cublasLtLegacyGemmZZZ +cublasLtLoggerForceDisable +cublasLtLoggerOpenFile +cublasLtLoggerSetCallback +cublasLtLoggerSetFile +cublasLtLoggerSetLevel +cublasLtLoggerSetMask +cublasLtMatmul +cublasLtMatmulAlgoCapGetAttribute +cublasLtMatmulAlgoCheck +cublasLtMatmulAlgoConfigGetAttribute +cublasLtMatmulAlgoConfigGetAttributeRange +cublasLtMatmulAlgoConfigSetAttribute +cublasLtMatmulAlgoGetHeuristic +cublasLtMatmulAlgoGetIds +cublasLtMatmulAlgoInit +cublasLtMatmulDescCreate +cublasLtMatmulDescDestroy +cublasLtMatmulDescGetAttribute +cublasLtMatmulDescInit_internal +cublasLtMatmulDescSetAttribute +cublasLtMatmulPreferenceCreate +cublasLtMatmulPreferenceDestroy +cublasLtMatmulPreferenceGetAttribute +cublasLtMatmulPreferenceInit_internal +cublasLtMatmulPreferenceSetAttribute +cublasLtMatrixLayoutCreate +cublasLtMatrixLayoutDestroy +cublasLtMatrixLayoutGetAttribute +cublasLtMatrixLayoutInit_internal +cublasLtMatrixLayoutSetAttribute +cublasLtMatrixTransform +cublasLtMatrixTransformDescCreate +cublasLtMatrixTransformDescDestroy +cublasLtMatrixTransformDescGetAttribute +cublasLtMatrixTransformDescInit_internal +cublasLtMatrixTransformDescSetAttribute +cublasLtSSSMatmul +cublasLtSSSMatmulAlgoCapGetAttribute +cublasLtSSSMatmulAlgoCheck +cublasLtSSSMatmulAlgoGetHeuristic +cublasLtSSSMatmulAlgoGetIds +cublasLtSSSMatmulAlgoInit +cublasLtShutdownCtx +cublasLtTSSMatmul +cublasLtTSSMatmulAlgoCapGetAttribute +cublasLtTSSMatmulAlgoCheck +cublasLtTSSMatmulAlgoGetHeuristic +cublasLtTSSMatmulAlgoGetIds +cublasLtTSSMatmulAlgoInit +cublasLtTSTMatmul +cublasLtTSTMatmulAlgoCapGetAttribute +cublasLtTSTMatmulAlgoCheck +cublasLtTSTMatmulAlgoGetHeuristic +cublasLtTSTMatmulAlgoGetIds +cublasLtTSTMatmulAlgoInit +cublasLtVCCMatmul +cublasLtVCCMatmulAlgoCapGetAttribute +cublasLtVCCMatmulAlgoCheck +cublasLtVCCMatmulAlgoGetHeuristic +cublasLtVCCMatmulAlgoGetIds +cublasLtVCCMatmulAlgoInit +cublasLtVCVMatmul +cublasLtVCVMatmulAlgoCapGetAttribute +cublasLtVCVMatmulAlgoCheck +cublasLtVCVMatmulAlgoGetHeuristic +cublasLtVCVMatmulAlgoGetIds +cublasLtVCVMatmulAlgoInit +cublasLtZZZMatmul +cublasLtZZZMatmulAlgoCapGetAttribute +cublasLtZZZMatmulAlgoCheck +cublasLtZZZMatmulAlgoGetHeuristic +cublasLtZZZMatmulAlgoGetIds +cublasLtZZZMatmulAlgoInit +cublasLt_for_cublas_BII +cublasLt_for_cublas_BSS +cublasLt_for_cublas_CCC +cublasLt_for_cublas_DDD +cublasLt_for_cublas_HHH +cublasLt_for_cublas_HSH +cublasLt_for_cublas_HSS +cublasLt_for_cublas_SSS +cublasLt_for_cublas_TSS +cublasLt_for_cublas_TST +cublasLt_for_cublas_ZZZ diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_11_0.inc deleted file mode 100644 index 5645753c56bcf4..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_11_0.inc +++ /dev/null @@ -1,390 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cublasStatus_t CUBLASWINAPI cublasLtCreate(cublasLtHandle_t *lightHandle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtHandle_t *); - static auto func_ptr = LoadSymbol("cublasLtCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle); -} - -cublasStatus_t CUBLASWINAPI cublasLtDestroy(cublasLtHandle_t lightHandle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtHandle_t); - static auto func_ptr = LoadSymbol("cublasLtDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle); -} - -size_t CUBLASWINAPI cublasLtGetVersion(void) { - using FuncPtr = size_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasLtGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUBLASWINAPI cublasLtGetCudartVersion(void) { - using FuncPtr = size_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasLtGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -cublasStatus_t CUBLASWINAPI cublasLtGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cublasLtGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmul( - cublasLtHandle_t lightHandle, cublasLtMatmulDesc_t computeDesc, - const void *alpha, /* host or device pointer */ - const void *A, cublasLtMatrixLayout_t Adesc, const void *B, - cublasLtMatrixLayout_t Bdesc, const void *beta, /* host or device pointer */ - const void *C, cublasLtMatrixLayout_t Cdesc, void *D, - cublasLtMatrixLayout_t Ddesc, const cublasLtMatmulAlgo_t *algo, - void *workspace, size_t workspaceSizeInBytes, cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtHandle_t, cublasLtMatmulDesc_t, const void *, const void *, - cublasLtMatrixLayout_t, const void *, cublasLtMatrixLayout_t, - const void *, const void *, cublasLtMatrixLayout_t, void *, - cublasLtMatrixLayout_t, const cublasLtMatmulAlgo_t *, void *, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cublasLtMatmul"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle, computeDesc, alpha, A, Adesc, B, Bdesc, beta, C, - Cdesc, D, Ddesc, algo, workspace, workspaceSizeInBytes, - stream); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixTransform( - cublasLtHandle_t lightHandle, cublasLtMatrixTransformDesc_t transformDesc, - const void *alpha, /* host or device pointer */ - const void *A, cublasLtMatrixLayout_t Adesc, - const void *beta, /* host or device pointer */ - const void *B, cublasLtMatrixLayout_t Bdesc, void *C, - cublasLtMatrixLayout_t Cdesc, cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtHandle_t, cublasLtMatrixTransformDesc_t, const void *, - const void *, cublasLtMatrixLayout_t, const void *, const void *, - cublasLtMatrixLayout_t, void *, cublasLtMatrixLayout_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasLtMatrixTransform"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle, transformDesc, alpha, A, Adesc, beta, B, Bdesc, - C, Cdesc, stream); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutInit_internal( // - cublasLtMatrixLayout_t matLayout, size_t size, cudaDataType type, - uint64_t rows, uint64_t cols, int64_t ld) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatrixLayout_t, size_t, cudaDataType, uint64_t, uint64_t, - int64_t); - static auto func_ptr = - LoadSymbol("cublasLtMatrixLayoutInit_internal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matLayout, size, type, rows, cols, ld); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutCreate( // - cublasLtMatrixLayout_t *matLayout, cudaDataType type, uint64_t rows, - uint64_t cols, int64_t ld) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatrixLayout_t *, cudaDataType, uint64_t, uint64_t, int64_t); - static auto func_ptr = LoadSymbol("cublasLtMatrixLayoutCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matLayout, type, rows, cols, ld); -} - -cublasStatus_t CUBLASWINAPI -cublasLtMatrixLayoutDestroy(cublasLtMatrixLayout_t matLayout) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtMatrixLayout_t); - static auto func_ptr = LoadSymbol("cublasLtMatrixLayoutDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matLayout); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutSetAttribute( // - cublasLtMatrixLayout_t matLayout, cublasLtMatrixLayoutAttribute_t attr, - const void *buf, size_t sizeInBytes) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatrixLayout_t, cublasLtMatrixLayoutAttribute_t, const void *, - size_t); - static auto func_ptr = - LoadSymbol("cublasLtMatrixLayoutSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matLayout, attr, buf, sizeInBytes); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixLayoutGetAttribute( // - cublasLtMatrixLayout_t matLayout, cublasLtMatrixLayoutAttribute_t attr, - void *buf, size_t sizeInBytes, size_t *sizeWritten) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatrixLayout_t, cublasLtMatrixLayoutAttribute_t, void *, size_t, - size_t *); - static auto func_ptr = - LoadSymbol("cublasLtMatrixLayoutGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matLayout, attr, buf, sizeInBytes, sizeWritten); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulDescInit_internal( // - cublasLtMatmulDesc_t matmulDesc, size_t size, - cublasComputeType_t computeType, cudaDataType_t scaleType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatmulDesc_t, size_t, cublasComputeType_t, cudaDataType_t); - static auto func_ptr = LoadSymbol("cublasLtMatmulDescInit_internal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matmulDesc, size, computeType, scaleType); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulDescCreate( - cublasLtMatmulDesc_t *matmulDesc, cublasComputeType_t computeType, - cudaDataType_t scaleType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtMatmulDesc_t *, cublasComputeType_t, cudaDataType_t); - static auto func_ptr = LoadSymbol("cublasLtMatmulDescCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matmulDesc, computeType, scaleType); -} - -cublasStatus_t CUBLASWINAPI -cublasLtMatmulDescDestroy(cublasLtMatmulDesc_t matmulDesc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtMatmulDesc_t); - static auto func_ptr = LoadSymbol("cublasLtMatmulDescDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matmulDesc); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulDescSetAttribute( // - cublasLtMatmulDesc_t matmulDesc, cublasLtMatmulDescAttributes_t attr, - const void *buf, size_t sizeInBytes) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatmulDesc_t, cublasLtMatmulDescAttributes_t, const void *, - size_t); - static auto func_ptr = LoadSymbol("cublasLtMatmulDescSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matmulDesc, attr, buf, sizeInBytes); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulDescGetAttribute( // - cublasLtMatmulDesc_t matmulDesc, cublasLtMatmulDescAttributes_t attr, - void *buf, size_t sizeInBytes, size_t *sizeWritten) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatmulDesc_t, cublasLtMatmulDescAttributes_t, void *, size_t, - size_t *); - static auto func_ptr = LoadSymbol("cublasLtMatmulDescGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(matmulDesc, attr, buf, sizeInBytes, sizeWritten); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescInit_internal( - cublasLtMatrixTransformDesc_t transformDesc, size_t size, - cudaDataType scaleType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtMatrixTransformDesc_t, - size_t, cudaDataType); - static auto func_ptr = - LoadSymbol("cublasLtMatrixTransformDescInit_internal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, size, scaleType); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescCreate( - cublasLtMatrixTransformDesc_t *transformDesc, cudaDataType scaleType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtMatrixTransformDesc_t *, cudaDataType); - static auto func_ptr = - LoadSymbol("cublasLtMatrixTransformDescCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, scaleType); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescDestroy( - cublasLtMatrixTransformDesc_t transformDesc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtMatrixTransformDesc_t); - static auto func_ptr = - LoadSymbol("cublasLtMatrixTransformDescDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescSetAttribute( // - cublasLtMatrixTransformDesc_t transformDesc, - cublasLtMatrixTransformDescAttributes_t attr, const void *buf, - size_t sizeInBytes) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatrixTransformDesc_t, cublasLtMatrixTransformDescAttributes_t, - const void *, size_t); - static auto func_ptr = - LoadSymbol("cublasLtMatrixTransformDescSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, attr, buf, sizeInBytes); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatrixTransformDescGetAttribute( // - cublasLtMatrixTransformDesc_t transformDesc, - cublasLtMatrixTransformDescAttributes_t attr, void *buf, size_t sizeInBytes, - size_t *sizeWritten) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatrixTransformDesc_t, cublasLtMatrixTransformDescAttributes_t, - void *, size_t, size_t *); - static auto func_ptr = - LoadSymbol("cublasLtMatrixTransformDescGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, attr, buf, sizeInBytes, sizeWritten); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceInit_internal( - cublasLtMatmulPreference_t pref, size_t size) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasLtMatmulPreference_t, size_t); - static auto func_ptr = - LoadSymbol("cublasLtMatmulPreferenceInit_internal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pref, size); -} - -cublasStatus_t CUBLASWINAPI -cublasLtMatmulPreferenceCreate(cublasLtMatmulPreference_t *pref) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtMatmulPreference_t *); - static auto func_ptr = LoadSymbol("cublasLtMatmulPreferenceCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pref); -} - -cublasStatus_t CUBLASWINAPI -cublasLtMatmulPreferenceDestroy(cublasLtMatmulPreference_t pref) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLtMatmulPreference_t); - static auto func_ptr = LoadSymbol("cublasLtMatmulPreferenceDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pref); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceSetAttribute( // - cublasLtMatmulPreference_t pref, cublasLtMatmulPreferenceAttributes_t attr, - const void *buf, size_t sizeInBytes) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatmulPreference_t, cublasLtMatmulPreferenceAttributes_t, - const void *, size_t); - static auto func_ptr = - LoadSymbol("cublasLtMatmulPreferenceSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pref, attr, buf, sizeInBytes); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulPreferenceGetAttribute( // - cublasLtMatmulPreference_t pref, cublasLtMatmulPreferenceAttributes_t attr, - void *buf, size_t sizeInBytes, size_t *sizeWritten) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtMatmulPreference_t, cublasLtMatmulPreferenceAttributes_t, void *, - size_t, size_t *); - static auto func_ptr = - LoadSymbol("cublasLtMatmulPreferenceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pref, attr, buf, sizeInBytes, sizeWritten); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoGetHeuristic( - cublasLtHandle_t lightHandle, cublasLtMatmulDesc_t operationDesc, - cublasLtMatrixLayout_t Adesc, cublasLtMatrixLayout_t Bdesc, - cublasLtMatrixLayout_t Cdesc, cublasLtMatrixLayout_t Ddesc, - cublasLtMatmulPreference_t preference, int requestedAlgoCount, - cublasLtMatmulHeuristicResult_t heuristicResultsArray[], - int *returnAlgoCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtHandle_t, cublasLtMatmulDesc_t, cublasLtMatrixLayout_t, - cublasLtMatrixLayout_t, cublasLtMatrixLayout_t, cublasLtMatrixLayout_t, - cublasLtMatmulPreference_t, int, cublasLtMatmulHeuristicResult_t[], - int *); - static auto func_ptr = LoadSymbol("cublasLtMatmulAlgoGetHeuristic"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle, operationDesc, Adesc, Bdesc, Cdesc, Ddesc, - preference, requestedAlgoCount, heuristicResultsArray, - returnAlgoCount); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoGetIds( - cublasLtHandle_t lightHandle, cublasComputeType_t computeType, - cudaDataType_t scaleType, cudaDataType_t Atype, cudaDataType_t Btype, - cudaDataType_t Ctype, cudaDataType_t Dtype, int requestedAlgoCount, - int algoIdsArray[], int *returnAlgoCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtHandle_t, cublasComputeType_t, cudaDataType_t, cudaDataType_t, - cudaDataType_t, cudaDataType_t, cudaDataType_t, int, int[], int *); - static auto func_ptr = LoadSymbol("cublasLtMatmulAlgoGetIds"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle, computeType, scaleType, Atype, Btype, Ctype, - Dtype, requestedAlgoCount, algoIdsArray, returnAlgoCount); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoInit( - cublasLtHandle_t lightHandle, cublasComputeType_t computeType, - cudaDataType_t scaleType, cudaDataType_t Atype, cudaDataType_t Btype, - cudaDataType_t Ctype, cudaDataType_t Dtype, int algoId, - cublasLtMatmulAlgo_t *algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtHandle_t, cublasComputeType_t, cudaDataType_t, cudaDataType_t, - cudaDataType_t, cudaDataType_t, cudaDataType_t, int, - cublasLtMatmulAlgo_t *); - static auto func_ptr = LoadSymbol("cublasLtMatmulAlgoInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle, computeType, scaleType, Atype, Btype, Ctype, - Dtype, algoId, algo); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoCheck( // - cublasLtHandle_t lightHandle, cublasLtMatmulDesc_t operationDesc, - cublasLtMatrixLayout_t Adesc, cublasLtMatrixLayout_t Bdesc, - cublasLtMatrixLayout_t Cdesc, cublasLtMatrixLayout_t Ddesc, - const cublasLtMatmulAlgo_t *algo, ///< may point to result->algo - cublasLtMatmulHeuristicResult_t *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( // - cublasLtHandle_t, cublasLtMatmulDesc_t, cublasLtMatrixLayout_t, - cublasLtMatrixLayout_t, cublasLtMatrixLayout_t, cublasLtMatrixLayout_t, - const cublasLtMatmulAlgo_t *, ///< may point to result->algo - cublasLtMatmulHeuristicResult_t *); - static auto func_ptr = LoadSymbol("cublasLtMatmulAlgoCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lightHandle, operationDesc, Adesc, Bdesc, Cdesc, Ddesc, algo, - result); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoCapGetAttribute( - const cublasLtMatmulAlgo_t *algo, cublasLtMatmulAlgoCapAttributes_t attr, - void *buf, size_t sizeInBytes, size_t *sizeWritten) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - const cublasLtMatmulAlgo_t *, cublasLtMatmulAlgoCapAttributes_t, void *, - size_t, size_t *); - static auto func_ptr = - LoadSymbol("cublasLtMatmulAlgoCapGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algo, attr, buf, sizeInBytes, sizeWritten); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoConfigSetAttribute( - cublasLtMatmulAlgo_t *algo, cublasLtMatmulAlgoConfigAttributes_t attr, - const void *buf, size_t sizeInBytes) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasLtMatmulAlgo_t *, cublasLtMatmulAlgoConfigAttributes_t, - const void *, size_t); - static auto func_ptr = - LoadSymbol("cublasLtMatmulAlgoConfigSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algo, attr, buf, sizeInBytes); -} - -cublasStatus_t CUBLASWINAPI cublasLtMatmulAlgoConfigGetAttribute( - const cublasLtMatmulAlgo_t *algo, cublasLtMatmulAlgoConfigAttributes_t attr, - void *buf, size_t sizeInBytes, size_t *sizeWritten) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - const cublasLtMatmulAlgo_t *, cublasLtMatmulAlgoConfigAttributes_t, - void *, size_t, size_t *); - static auto func_ptr = - LoadSymbol("cublasLtMatmulAlgoConfigGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algo, attr, buf, sizeInBytes, sizeWritten); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_stub.cc index f7e62f704d2ded..df4e73bebc126c 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cublasLt_stub.cc @@ -22,38 +22,48 @@ limitations under the License. namespace { // Returns DSO handle or null if loading the DSO fails. void* GetDsoHandle() { -#ifdef PLATFORM_GOOGLE - return nullptr; -#else static auto handle = []() -> void* { - auto handle_or = - tsl::internal::DsoLoader::GetCublasLtDsoHandle(); + auto handle_or = tsl::internal::DsoLoader::GetCublasLtDsoHandle(); if (!handle_or.ok()) return nullptr; return handle_or.value(); }(); return handle; -#endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -void LogFatalSymbolNotFound(const char* symbol_name) { - LOG(FATAL) << symbol_name << " symbol not found."; -} +const char* kSymbols[] = { +#include "tsl/cuda/cublasLt.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); -cublasStatus_t GetSymbolNotFoundError() { return CUBLAS_STATUS_INTERNAL_ERROR; } } // namespace -// We only use cublasLt from CUDA 11.0 onward. -#if CUDA_VERSION >= 11000 -#include "tsl/cuda/cublasLt_11_0.inc" -#endif +extern "C" { + +static cublasStatus_t GetSymbolNotFoundError() { + return CUBLAS_STATUS_INTERNAL_ERROR; +} + +extern void* _cublasLt_tramp_table[]; + +void _cublasLt_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cublasLt_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_0.inc deleted file mode 100644 index c24fd44c4f2613..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_0.inc +++ /dev/null @@ -1,4898 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *); - static auto func_ptr = LoadSymbol("cublasCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t); - static auto func_ptr = LoadSymbol("cublasDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, - int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); - static auto func_ptr = LoadSymbol("cublasGetVersion_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cublasGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cublasGetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *); - static auto func_ptr = LoadSymbol("cublasGetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t); - static auto func_ptr = LoadSymbol("cublasSetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *); - static auto func_ptr = LoadSymbol("cublasGetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t); - static auto func_ptr = LoadSymbol("cublasSetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, - cublasMath_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *); - static auto func_ptr = LoadSymbol("cublasGetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, - cublasMath_t mode) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t); - static auto func_ptr = LoadSymbol("cublasSetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasLoggerConfigure(int logIsOn, int logToStdOut, - int logToStdErr, - const char *logFileName) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const char *); - static auto func_ptr = LoadSymbol("cublasLoggerConfigure"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(logIsOn, logToStdOut, logToStdErr, logFileName); -} - -cublasStatus_t CUBLASWINAPI -cublasSetLoggerCallback(cublasLogCallback userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback); - static auto func_ptr = LoadSymbol("cublasSetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI -cublasGetLoggerCallback(cublasLogCallback *userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback *); - static auto func_ptr = LoadSymbol("cublasGetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x, - int incx, void *devicePtr, - int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, devicePtr, incy); -} - -cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x, - int incx, void *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, - const void *hostPtr, int incx, - void *devicePtr, int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, - const void *devicePtr, - int incx, void *hostPtr, - int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -void CUBLASWINAPI cublasXerbla(const char *srName, int info) { - using FuncPtr = void(CUBLASWINAPI *)(const char *, int); - static auto func_ptr = LoadSymbol("cublasXerbla"); - if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla"); - return func_ptr(srName, info); -} - -cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *result, - cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasNrm2Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDznrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotcEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, - const float *x, int incx, - const float *y, int incy, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, - const double *x, int incx, - const double *y, int incy, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI -cublasScalEx(cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, void *x, cudaDataType xType, int incx, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, - int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasScalEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType); -} - -cublasStatus_t CUBLASWINAPI -cublasSscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasDscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCscal_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCsscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZscal_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZdscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasAxpyEx( - cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, const void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, const void *, - cudaDataType, int, void *, cudaDataType, int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAxpyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSaxpy_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDaxpy_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCaxpy_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZaxpy_v2( - cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, - const float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, - const double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, - cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, - cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDzasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI -cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, - int, const float *, const float *); - static auto func_ptr = LoadSymbol("cublasSrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y, - int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *, - const double *); - static auto func_ptr = LoadSymbol("cublasDrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCsrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const float *); - static auto func_ptr = LoadSymbol("cublasCsrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZdrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const double *); - static auto func_ptr = LoadSymbol("cublasZdrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */ - float *b, /* host or device pointer */ - float *c, /* host or device pointer */ - float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *, - float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */ - double *b, /* host or device pointer */ - double *c, /* host or device pointer */ - double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *, - double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */ - cuComplex *b, /* host or device pointer */ - float *c, /* host or device pointer */ - cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrotg_v2( - cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */ - cuDoubleComplex *b, /* host or device pointer */ - double *c, /* host or device pointer */ - cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy, const float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy, const double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */ - float *d2, /* host or device pointer */ - float *x1, /* host or device pointer */ - const float *y1, /* host or device pointer */ - float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */ - double *d2, /* host or device pointer */ - double *x1, /* host or device pointer */ - const double *y1, /* host or device pointer */ - double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, double *, double *, double *, const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgemv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, - const float *, int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasCgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasStrmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStrsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZsymv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhemv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhbmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *AP, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *AP, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *AP, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasChpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSger_v2( - cublasHandle_t handle, int m, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDger_v2( - cublasHandle_t handle, int m, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeru_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgerc_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgeru_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgerc_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *); - static auto func_ptr = LoadSymbol("cublasSspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *); - static auto func_ptr = LoadSymbol("cublasDspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDspr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasChpr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3m( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, - int ldb, const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const float *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasGemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, - cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, const void *, cudaDataType, - int, const void *, void *, cudaDataType, int, cudaDataType, - cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasUint8gemmBias( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - cublasOperation_t transc, int m, int n, int k, const unsigned char *A, - int A_bias, int lda, const unsigned char *B, int B_bias, int ldb, - unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, - int, int, int, const unsigned char *, int, int, const unsigned char *, - int, int, unsigned char *, int, int, int, int); - static auto func_ptr = LoadSymbol("cublasUint8gemmBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, - B_bias, ldb, C, C_bias, ldc, C_mult, C_shift); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasCsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const cuComplex *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype, - int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const cuComplex *, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const cuDoubleComplex *, int, const double *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, const void *A, cudaDataType Atype, - int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasChemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZhemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZhemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, float *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, float *, - int); - static auto func_ptr = LoadSymbol("cublasStrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, double *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, double *, - int); - static auto func_ptr = LoadSymbol("cublasDtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, cuComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasStrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *const Aarray[], int lda, const float *const Barray[], int ldb, - const float *beta, /* host or device pointer */ - float *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *const[], int, const float *const[], int, - const float *, float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasSgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *const Aarray[], int lda, const double *const Barray[], - int ldb, const double *beta, /* host or device pointer */ - double *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *const[], int, const double *const[], int, - const double *, double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemm3mBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *const Aarray[], int lda, - const cuDoubleComplex *const Barray[], int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *const[], int, - const cuDoubleComplex *const[], int, const cuDoubleComplex *, - cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *const Aarray[], cudaDataType Atype, int lda, - const void *const Barray[], cudaDataType Btype, int ldb, - const void *beta, /* host or device pointer */ - void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, - cudaDataType computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *const[], cudaDataType, int, const void *const[], - cudaDataType, int, const void *, void *const[], cudaDataType, int, int, - cudaDataType, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, Atype, lda, - Barray, Btype, ldb, beta, Carray, Ctype, ldc, batchCount, - computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - long long int strideA, /* purposely signed */ - const void *B, cudaDataType Btype, int ldb, long long int strideB, - const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, - cudaDataType computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, long long, const void *, - cudaDataType, int, long long, const void *, void *, cudaDataType, int, - long long, int, cudaDataType, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmStridedBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, - strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, - batchCount, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, long long int strideA, /* purposely signed */ - const float *B, int ldb, long long int strideB, - const float *beta, /* host or device pointer */ - float *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, long long, const float *, int, - long long, const float *, float *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasSgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, long long int strideA, /* purposely signed */ - const double *B, int ldb, long long int strideB, - const double *beta, /* host or device pointer */ - double *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, long long, const double *, int, - long long, const double *, double *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasDgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - long long int strideA, /* purposely signed */ - const cuDoubleComplex *B, int ldb, long long int strideB, - const cuDoubleComplex *beta, /* host or device poi */ - cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, - cuDoubleComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasZgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - const cuComplex *B, int ldb, cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrfBatched( - cublasHandle_t handle, int n, float *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrfBatched( - cublasHandle_t handle, int n, double *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrfBatched( - cublasHandle_t handle, int n, cuComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrfBatched( - cublasHandle_t handle, int n, cuDoubleComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetriBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - float *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *const[], int, const int *, - float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetriBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - double *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *const[], int, const int *, - double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetriBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, const int *, - cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgetriBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuDoubleComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const float *const Aarray[], int lda, const int *devIpiv, - float *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *const[], int, - const int *, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const double *const Aarray[], int lda, const int *devIpiv, - double *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *const[], int, - const int *, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuComplex *const Aarray[], int lda, const int *devIpiv, - cuComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *const[], - int, const int *, cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *const Aarray[], int lda, const int *devIpiv, - cuDoubleComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, - const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasStrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /*Host or Device Pointer*/ - const float *const A[], int lda, float *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *const[], int, - float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasStrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /*Host or Device Pointer*/ - const double *const A[], int lda, double *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *const[], int, - double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /*Host or Device Pointer*/ - const cuComplex *const A[], int lda, cuComplex *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *const[], - int, cuComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasCtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /*Host or Device Pointer*/ - const cuDoubleComplex *const A[], int lda, cuDoubleComplex *const B[], - int ldb, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSmatinvBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, float *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const float *const[], - int, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDmatinvBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, double *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const double *const[], - int, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCmatinvBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, cuComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, cuComplex *const[], - int, int *, int); - static auto func_ptr = LoadSymbol("cublasCmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZmatinvBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, cuDoubleComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, - float *const Aarray[], /*Device pointer*/ - int lda, float *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, float *const[], - int, float *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasSgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, - double *const Aarray[], /*Device pointer*/ - int lda, double *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, double *const[], - int, double *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasDgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, - cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuComplex *const[], int, cuComplex *const[], - int *, int); - static auto func_ptr = LoadSymbol("cublasCgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( - cublasHandle_t handle, int m, int n, - cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasZgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, float *const Aarray[], /*Device pointer*/ - int lda, float *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, float *const[], int, - float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, double *const Aarray[], /*Device pointer*/ - int lda, double *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, double *const[], int, - double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *const[], int, - cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, - cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int *, - int *, int); - static auto func_ptr = LoadSymbol("cublasZgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const float *A, int lda, const float *x, - int incx, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const double *A, int lda, - const double *x, int incx, double *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuComplex *A, int lda, - const cuComplex *x, int incx, - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *AP, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *AP, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *AP, cuComplex *A, - int lda) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *A, int lda, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasStrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *A, int lda, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *A, int lda, - cuComplex *AP) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus CUBLASWINAPI cublasInit(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasShutdown(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasShutdown"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetError(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetVersion(int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *); - static auto func_ptr = LoadSymbol("cublasGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **); - static auto func_ptr = LoadSymbol("cublasAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr); -} - -cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *); - static auto func_ptr = LoadSymbol("cublasFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devicePtr); -} - -cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetKernelStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDznrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y, - int incy) { - using FuncPtr = - float(CUBLASWINAPI *)(int, const float *, int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSdot"); - return func_ptr(n, x, incx, y, incy); -} - -double CUBLASWINAPI cublasDdot(int n, const double *x, int incx, - const double *y, int incy) { - using FuncPtr = - double(CUBLASWINAPI *)(int, const double *, int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDdot"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx, - float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x, - int incx, cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZswap"); - return func_ptr(n, x, incx, y, incy); -} - -int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDasum"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDzasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum"); - return func_ptr(n, x, incx); -} - -void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy, - float sc, float ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float); - static auto func_ptr = LoadSymbol("cublasSrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy, - double sc, double ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double); - static auto func_ptr = LoadSymbol("cublasDrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, cuComplex s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, cuComplex); - static auto func_ptr = LoadSymbol("cublasCrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double sc, - cuDoubleComplex cs) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cublasZrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrot"); - return func_ptr(n, x, incx, y, incy, sc, cs); -} - -void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, float s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, float); - static auto func_ptr = LoadSymbol("cublasCsrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double c, - double s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double); - static auto func_ptr = LoadSymbol("cublasZdrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) { - using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc, - cuComplex *cs) { - using FuncPtr = - void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb, - double *sc, cuDoubleComplex *cs) { - using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, - double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, - const float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, - const double *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1, - const float *sy1, float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1, - const double *sy1, double *sparam) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku, - float alpha, const float *A, int lda, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku, - double alpha, const double *A, int lda, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *x, int incx, cuComplex beta, - cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A, - int lda, const float *x, int incx, float beta, - float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A, - int lda, const double *x, int incx, double beta, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, const double *, - int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha, - const cuComplex *AP, const cuComplex *x, int incx, - cuComplex beta, cuComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *AP, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x, - int incx, float *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x, - int incx, double *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasZher(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x, - int incx, float *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x, - int incx, double *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k, - float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, float beta, float *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha, - const double *A, int lda, double beta, double *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, double, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - cuComplex beta, cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, - int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, - const cuDoubleComplex *, int, - cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha, - const cuComplex *A, int lda, float beta, - cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, - float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha, - const cuDoubleComplex *A, int lda, double beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double, - const cuDoubleComplex *, int, double, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, float beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - double beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha, - const double *A, int lda, const double *B, - int ldb, double beta, double *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_1.inc deleted file mode 100644 index 067ba675288524..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_1.inc +++ /dev/null @@ -1,5023 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *); - static auto func_ptr = LoadSymbol("cublasCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t); - static auto func_ptr = LoadSymbol("cublasDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, - int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); - static auto func_ptr = LoadSymbol("cublasGetVersion_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cublasGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -size_t CUBLASWINAPI cublasGetCudartVersion(void) { - using FuncPtr = size_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetCudartVersion"); - if (!func_ptr) LogFatalSymbolNotFound("cublasGetCudartVersion"); - return func_ptr(); -} - -cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cublasGetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *); - static auto func_ptr = LoadSymbol("cublasGetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t); - static auto func_ptr = LoadSymbol("cublasSetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *); - static auto func_ptr = LoadSymbol("cublasGetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t); - static auto func_ptr = LoadSymbol("cublasSetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, - cublasMath_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *); - static auto func_ptr = LoadSymbol("cublasGetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, - cublasMath_t mode) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t); - static auto func_ptr = LoadSymbol("cublasSetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasLoggerConfigure(int logIsOn, int logToStdOut, - int logToStdErr, - const char *logFileName) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const char *); - static auto func_ptr = LoadSymbol("cublasLoggerConfigure"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(logIsOn, logToStdOut, logToStdErr, logFileName); -} - -cublasStatus_t CUBLASWINAPI -cublasSetLoggerCallback(cublasLogCallback userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback); - static auto func_ptr = LoadSymbol("cublasSetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI -cublasGetLoggerCallback(cublasLogCallback *userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback *); - static auto func_ptr = LoadSymbol("cublasGetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x, - int incx, void *devicePtr, - int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, devicePtr, incy); -} - -cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x, - int incx, void *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, - const void *hostPtr, int incx, - void *devicePtr, int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, - const void *devicePtr, - int incx, void *hostPtr, - int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -void CUBLASWINAPI cublasXerbla(const char *srName, int info) { - using FuncPtr = void(CUBLASWINAPI *)(const char *, int); - static auto func_ptr = LoadSymbol("cublasXerbla"); - if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla"); - return func_ptr(srName, info); -} - -cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *result, - cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasNrm2Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDznrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotcEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, - const float *x, int incx, - const float *y, int incy, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, - const double *x, int incx, - const double *y, int incy, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI -cublasScalEx(cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, void *x, cudaDataType xType, int incx, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, - int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasScalEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType); -} - -cublasStatus_t CUBLASWINAPI -cublasSscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasDscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCscal_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCsscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZscal_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZdscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasAxpyEx( - cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, const void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, const void *, - cudaDataType, int, void *, cudaDataType, int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAxpyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSaxpy_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDaxpy_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCaxpy_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZaxpy_v2( - cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCopyEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *y, cudaDataType yType, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCopyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy); -} - -cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, - const float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, - const double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, - cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, - cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSwapEx(cublasHandle_t handle, int n, void *x, - cudaDataType xType, int incx, void *y, - cudaDataType yType, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, void *, cudaDataType, - int, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSwapEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy); -} - -cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIamaxEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - int *result /* host or device pointer */ -) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, int *); - static auto func_ptr = LoadSymbol("cublasIamaxEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIaminEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - int *result /* host or device pointer */ -) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, int *); - static auto func_ptr = LoadSymbol("cublasIaminEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasAsumEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - void *result, cudaDataType resultType, /* host or device pointer */ - cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAsumEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executiontype); -} - -cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDzasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI -cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, - int, const float *, const float *); - static auto func_ptr = LoadSymbol("cublasSrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y, - int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *, - const double *); - static auto func_ptr = LoadSymbol("cublasDrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCsrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const float *); - static auto func_ptr = LoadSymbol("cublasCsrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZdrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const double *); - static auto func_ptr = LoadSymbol("cublasZdrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasRotEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, - const void *c, /* host or device pointer */ - const void *s, cudaDataType csType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, - const void *, const void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, c, s, csType, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */ - float *b, /* host or device pointer */ - float *c, /* host or device pointer */ - float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *, - float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */ - double *b, /* host or device pointer */ - double *c, /* host or device pointer */ - double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *, - double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */ - cuComplex *b, /* host or device pointer */ - float *c, /* host or device pointer */ - cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrotg_v2( - cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */ - cuDoubleComplex *b, /* host or device pointer */ - double *c, /* host or device pointer */ - cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasRotgEx(cublasHandle_t handle, - void *a, /* host or device pointer */ - void *b, /* host or device pointer */ - cudaDataType abType, - void *c, /* host or device pointer */ - void *s, /* host or device pointer */ - cudaDataType csType, - cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, void *, void *, - cudaDataType, void *, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotgEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, abType, c, s, csType, executiontype); -} - -cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy, const float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy, const double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI -cublasRotmEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, - int incx, void *y, cudaDataType yType, int incy, - const void *param, /* host or device pointer */ - cudaDataType paramType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, - const void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, param, paramType, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */ - float *d2, /* host or device pointer */ - float *x1, /* host or device pointer */ - const float *y1, /* host or device pointer */ - float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */ - double *d2, /* host or device pointer */ - double *x1, /* host or device pointer */ - const double *y1, /* host or device pointer */ - double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, double *, double *, double *, const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasRotmgEx(cublasHandle_t handle, void *d1, /* host or device pointer */ - cudaDataType d1Type, void *d2, /* host or device pointer */ - cudaDataType d2Type, void *x1, /* host or device pointer */ - cudaDataType x1Type, const void *y1, /* host or device pointer */ - cudaDataType y1Type, void *param, /* host or device pointer */ - cudaDataType paramType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, void *, cudaDataType, void *, cudaDataType, void *, - cudaDataType, const void *, cudaDataType, void *, cudaDataType, - cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotmgEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d1Type, d2, d2Type, x1, x1Type, y1, y1Type, param, - paramType, executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgemv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, - const float *, int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasCgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasStrmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStrsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZsymv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhemv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhbmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *AP, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *AP, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *AP, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasChpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSger_v2( - cublasHandle_t handle, int m, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDger_v2( - cublasHandle_t handle, int m, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeru_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgerc_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgeru_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgerc_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *); - static auto func_ptr = LoadSymbol("cublasSspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *); - static auto func_ptr = LoadSymbol("cublasDspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDspr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasChpr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3m( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, - int ldb, const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const float *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasGemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, - cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, const void *, cudaDataType, - int, const void *, void *, cudaDataType, int, cudaDataType, - cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasUint8gemmBias( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - cublasOperation_t transc, int m, int n, int k, const unsigned char *A, - int A_bias, int lda, const unsigned char *B, int B_bias, int ldb, - unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, - int, int, int, const unsigned char *, int, int, const unsigned char *, - int, int, unsigned char *, int, int, int, int); - static auto func_ptr = LoadSymbol("cublasUint8gemmBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, - B_bias, ldb, C, C_bias, ldc, C_mult, C_shift); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasCsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const cuComplex *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype, - int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const cuComplex *, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const cuDoubleComplex *, int, const double *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, const void *A, cudaDataType Atype, - int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasChemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZhemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZhemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, float *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, float *, - int); - static auto func_ptr = LoadSymbol("cublasStrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, double *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, double *, - int); - static auto func_ptr = LoadSymbol("cublasDtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, cuComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasStrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *const Aarray[], int lda, const float *const Barray[], int ldb, - const float *beta, /* host or device pointer */ - float *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *const[], int, const float *const[], int, - const float *, float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasSgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *const Aarray[], int lda, const double *const Barray[], - int ldb, const double *beta, /* host or device pointer */ - double *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *const[], int, const double *const[], int, - const double *, double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemm3mBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *const Aarray[], int lda, - const cuDoubleComplex *const Barray[], int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *const[], int, - const cuDoubleComplex *const[], int, const cuDoubleComplex *, - cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *const Aarray[], cudaDataType Atype, int lda, - const void *const Barray[], cudaDataType Btype, int ldb, - const void *beta, /* host or device pointer */ - void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, - cudaDataType computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *const[], cudaDataType, int, const void *const[], - cudaDataType, int, const void *, void *const[], cudaDataType, int, int, - cudaDataType, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, Atype, lda, - Barray, Btype, ldb, beta, Carray, Ctype, ldc, batchCount, - computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - long long int strideA, /* purposely signed */ - const void *B, cudaDataType Btype, int ldb, long long int strideB, - const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, - cudaDataType computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, long long, const void *, - cudaDataType, int, long long, const void *, void *, cudaDataType, int, - long long, int, cudaDataType, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmStridedBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, - strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, - batchCount, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, long long int strideA, /* purposely signed */ - const float *B, int ldb, long long int strideB, - const float *beta, /* host or device pointer */ - float *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, long long, const float *, int, - long long, const float *, float *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasSgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, long long int strideA, /* purposely signed */ - const double *B, int ldb, long long int strideB, - const double *beta, /* host or device pointer */ - double *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, long long, const double *, int, - long long, const double *, double *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasDgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - long long int strideA, /* purposely signed */ - const cuDoubleComplex *B, int ldb, long long int strideB, - const cuDoubleComplex *beta, /* host or device poi */ - cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, - cuDoubleComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasZgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - const cuComplex *B, int ldb, cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrfBatched( - cublasHandle_t handle, int n, float *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrfBatched( - cublasHandle_t handle, int n, double *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrfBatched( - cublasHandle_t handle, int n, cuComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrfBatched( - cublasHandle_t handle, int n, cuDoubleComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetriBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - float *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *const[], int, const int *, - float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetriBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - double *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *const[], int, const int *, - double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetriBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, const int *, - cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgetriBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuDoubleComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const float *const Aarray[], int lda, const int *devIpiv, - float *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *const[], int, - const int *, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const double *const Aarray[], int lda, const int *devIpiv, - double *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *const[], int, - const int *, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuComplex *const Aarray[], int lda, const int *devIpiv, - cuComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *const[], - int, const int *, cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *const Aarray[], int lda, const int *devIpiv, - cuDoubleComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, - const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasStrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /*Host or Device Pointer*/ - const float *const A[], int lda, float *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *const[], int, - float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasStrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /*Host or Device Pointer*/ - const double *const A[], int lda, double *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *const[], int, - double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /*Host or Device Pointer*/ - const cuComplex *const A[], int lda, cuComplex *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *const[], - int, cuComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasCtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /*Host or Device Pointer*/ - const cuDoubleComplex *const A[], int lda, cuDoubleComplex *const B[], - int ldb, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSmatinvBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, float *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const float *const[], - int, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDmatinvBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, double *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const double *const[], - int, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCmatinvBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, cuComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, cuComplex *const[], - int, int *, int); - static auto func_ptr = LoadSymbol("cublasCmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZmatinvBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, cuDoubleComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, - float *const Aarray[], /*Device pointer*/ - int lda, float *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, float *const[], - int, float *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasSgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, - double *const Aarray[], /*Device pointer*/ - int lda, double *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, double *const[], - int, double *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasDgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, - cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuComplex *const[], int, cuComplex *const[], - int *, int); - static auto func_ptr = LoadSymbol("cublasCgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( - cublasHandle_t handle, int m, int n, - cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasZgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, float *const Aarray[], /*Device pointer*/ - int lda, float *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, float *const[], int, - float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, double *const Aarray[], /*Device pointer*/ - int lda, double *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, double *const[], int, - double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *const[], int, - cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, - cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int *, - int *, int); - static auto func_ptr = LoadSymbol("cublasZgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const float *A, int lda, const float *x, - int incx, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const double *A, int lda, - const double *x, int incx, double *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuComplex *A, int lda, - const cuComplex *x, int incx, - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *AP, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *AP, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *AP, cuComplex *A, - int lda) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *A, int lda, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasStrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *A, int lda, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *A, int lda, - cuComplex *AP) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus CUBLASWINAPI cublasInit(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasShutdown(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasShutdown"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetError(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetVersion(int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *); - static auto func_ptr = LoadSymbol("cublasGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **); - static auto func_ptr = LoadSymbol("cublasAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr); -} - -cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *); - static auto func_ptr = LoadSymbol("cublasFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devicePtr); -} - -cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetKernelStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDznrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y, - int incy) { - using FuncPtr = - float(CUBLASWINAPI *)(int, const float *, int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSdot"); - return func_ptr(n, x, incx, y, incy); -} - -double CUBLASWINAPI cublasDdot(int n, const double *x, int incx, - const double *y, int incy) { - using FuncPtr = - double(CUBLASWINAPI *)(int, const double *, int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDdot"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx, - float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x, - int incx, cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZswap"); - return func_ptr(n, x, incx, y, incy); -} - -int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDasum"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDzasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum"); - return func_ptr(n, x, incx); -} - -void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy, - float sc, float ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float); - static auto func_ptr = LoadSymbol("cublasSrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy, - double sc, double ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double); - static auto func_ptr = LoadSymbol("cublasDrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, cuComplex s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, cuComplex); - static auto func_ptr = LoadSymbol("cublasCrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double sc, - cuDoubleComplex cs) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cublasZrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrot"); - return func_ptr(n, x, incx, y, incy, sc, cs); -} - -void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, float s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, float); - static auto func_ptr = LoadSymbol("cublasCsrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double c, - double s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double); - static auto func_ptr = LoadSymbol("cublasZdrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) { - using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc, - cuComplex *cs) { - using FuncPtr = - void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb, - double *sc, cuDoubleComplex *cs) { - using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, - double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, - const float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, - const double *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1, - const float *sy1, float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1, - const double *sy1, double *sparam) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku, - float alpha, const float *A, int lda, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku, - double alpha, const double *A, int lda, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *x, int incx, cuComplex beta, - cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A, - int lda, const float *x, int incx, float beta, - float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A, - int lda, const double *x, int incx, double beta, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, const double *, - int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha, - const cuComplex *AP, const cuComplex *x, int incx, - cuComplex beta, cuComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *AP, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x, - int incx, float *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x, - int incx, double *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasZher(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x, - int incx, float *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x, - int incx, double *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k, - float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, float beta, float *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha, - const double *A, int lda, double beta, double *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, double, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - cuComplex beta, cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, - int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, - const cuDoubleComplex *, int, - cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha, - const cuComplex *A, int lda, float beta, - cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, - float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha, - const cuDoubleComplex *A, int lda, double beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double, - const cuDoubleComplex *, int, double, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, float beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - double beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha, - const double *A, int lda, const double *B, - int ldb, double beta, double *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_2.inc deleted file mode 100644 index 067ba675288524..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublas_10_2.inc +++ /dev/null @@ -1,5023 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *); - static auto func_ptr = LoadSymbol("cublasCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t); - static auto func_ptr = LoadSymbol("cublasDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, - int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); - static auto func_ptr = LoadSymbol("cublasGetVersion_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cublasGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -size_t CUBLASWINAPI cublasGetCudartVersion(void) { - using FuncPtr = size_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetCudartVersion"); - if (!func_ptr) LogFatalSymbolNotFound("cublasGetCudartVersion"); - return func_ptr(); -} - -cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cublasGetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *); - static auto func_ptr = LoadSymbol("cublasGetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t); - static auto func_ptr = LoadSymbol("cublasSetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *); - static auto func_ptr = LoadSymbol("cublasGetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t); - static auto func_ptr = LoadSymbol("cublasSetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, - cublasMath_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *); - static auto func_ptr = LoadSymbol("cublasGetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, - cublasMath_t mode) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t); - static auto func_ptr = LoadSymbol("cublasSetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasLoggerConfigure(int logIsOn, int logToStdOut, - int logToStdErr, - const char *logFileName) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const char *); - static auto func_ptr = LoadSymbol("cublasLoggerConfigure"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(logIsOn, logToStdOut, logToStdErr, logFileName); -} - -cublasStatus_t CUBLASWINAPI -cublasSetLoggerCallback(cublasLogCallback userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback); - static auto func_ptr = LoadSymbol("cublasSetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI -cublasGetLoggerCallback(cublasLogCallback *userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback *); - static auto func_ptr = LoadSymbol("cublasGetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x, - int incx, void *devicePtr, - int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, devicePtr, incy); -} - -cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x, - int incx, void *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, - const void *hostPtr, int incx, - void *devicePtr, int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, - const void *devicePtr, - int incx, void *hostPtr, - int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -void CUBLASWINAPI cublasXerbla(const char *srName, int info) { - using FuncPtr = void(CUBLASWINAPI *)(const char *, int); - static auto func_ptr = LoadSymbol("cublasXerbla"); - if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla"); - return func_ptr(srName, info); -} - -cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *result, - cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasNrm2Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDznrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotcEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, - const float *x, int incx, - const float *y, int incy, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, - const double *x, int incx, - const double *y, int incy, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI -cublasScalEx(cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, void *x, cudaDataType xType, int incx, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, - int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasScalEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType); -} - -cublasStatus_t CUBLASWINAPI -cublasSscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasDscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCscal_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCsscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZscal_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZdscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasAxpyEx( - cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, const void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, const void *, - cudaDataType, int, void *, cudaDataType, int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAxpyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSaxpy_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDaxpy_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCaxpy_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZaxpy_v2( - cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCopyEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *y, cudaDataType yType, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCopyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy); -} - -cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, - const float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, - const double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, - cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, - cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSwapEx(cublasHandle_t handle, int n, void *x, - cudaDataType xType, int incx, void *y, - cudaDataType yType, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, void *, cudaDataType, - int, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSwapEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy); -} - -cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIamaxEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - int *result /* host or device pointer */ -) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, int *); - static auto func_ptr = LoadSymbol("cublasIamaxEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIaminEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - int *result /* host or device pointer */ -) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, int *); - static auto func_ptr = LoadSymbol("cublasIaminEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasAsumEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - void *result, cudaDataType resultType, /* host or device pointer */ - cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAsumEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executiontype); -} - -cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDzasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI -cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, - int, const float *, const float *); - static auto func_ptr = LoadSymbol("cublasSrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y, - int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *, - const double *); - static auto func_ptr = LoadSymbol("cublasDrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCsrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const float *); - static auto func_ptr = LoadSymbol("cublasCsrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZdrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const double *); - static auto func_ptr = LoadSymbol("cublasZdrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasRotEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, - const void *c, /* host or device pointer */ - const void *s, cudaDataType csType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, - const void *, const void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, c, s, csType, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */ - float *b, /* host or device pointer */ - float *c, /* host or device pointer */ - float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *, - float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */ - double *b, /* host or device pointer */ - double *c, /* host or device pointer */ - double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *, - double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */ - cuComplex *b, /* host or device pointer */ - float *c, /* host or device pointer */ - cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrotg_v2( - cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */ - cuDoubleComplex *b, /* host or device pointer */ - double *c, /* host or device pointer */ - cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasRotgEx(cublasHandle_t handle, - void *a, /* host or device pointer */ - void *b, /* host or device pointer */ - cudaDataType abType, - void *c, /* host or device pointer */ - void *s, /* host or device pointer */ - cudaDataType csType, - cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, void *, void *, - cudaDataType, void *, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotgEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, abType, c, s, csType, executiontype); -} - -cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy, const float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy, const double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI -cublasRotmEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, - int incx, void *y, cudaDataType yType, int incy, - const void *param, /* host or device pointer */ - cudaDataType paramType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, - const void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, param, paramType, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */ - float *d2, /* host or device pointer */ - float *x1, /* host or device pointer */ - const float *y1, /* host or device pointer */ - float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */ - double *d2, /* host or device pointer */ - double *x1, /* host or device pointer */ - const double *y1, /* host or device pointer */ - double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, double *, double *, double *, const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasRotmgEx(cublasHandle_t handle, void *d1, /* host or device pointer */ - cudaDataType d1Type, void *d2, /* host or device pointer */ - cudaDataType d2Type, void *x1, /* host or device pointer */ - cudaDataType x1Type, const void *y1, /* host or device pointer */ - cudaDataType y1Type, void *param, /* host or device pointer */ - cudaDataType paramType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, void *, cudaDataType, void *, cudaDataType, void *, - cudaDataType, const void *, cudaDataType, void *, cudaDataType, - cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotmgEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d1Type, d2, d2Type, x1, x1Type, y1, y1Type, param, - paramType, executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgemv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, - const float *, int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasCgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasStrmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStrsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZsymv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhemv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhbmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *AP, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *AP, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *AP, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasChpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSger_v2( - cublasHandle_t handle, int m, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDger_v2( - cublasHandle_t handle, int m, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeru_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgerc_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgeru_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgerc_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *); - static auto func_ptr = LoadSymbol("cublasSspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *); - static auto func_ptr = LoadSymbol("cublasDspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDspr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasChpr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3m( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, - int ldb, const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const float *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasGemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, - cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, const void *, cudaDataType, - int, const void *, void *, cudaDataType, int, cudaDataType, - cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasUint8gemmBias( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - cublasOperation_t transc, int m, int n, int k, const unsigned char *A, - int A_bias, int lda, const unsigned char *B, int B_bias, int ldb, - unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, - int, int, int, const unsigned char *, int, int, const unsigned char *, - int, int, unsigned char *, int, int, int, int); - static auto func_ptr = LoadSymbol("cublasUint8gemmBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, - B_bias, ldb, C, C_bias, ldc, C_mult, C_shift); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasCsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const cuComplex *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype, - int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const cuComplex *, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const cuDoubleComplex *, int, const double *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, const void *A, cudaDataType Atype, - int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasChemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZhemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZhemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, float *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, float *, - int); - static auto func_ptr = LoadSymbol("cublasStrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, double *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, double *, - int); - static auto func_ptr = LoadSymbol("cublasDtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, cuComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasStrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *const Aarray[], int lda, const float *const Barray[], int ldb, - const float *beta, /* host or device pointer */ - float *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *const[], int, const float *const[], int, - const float *, float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasSgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *const Aarray[], int lda, const double *const Barray[], - int ldb, const double *beta, /* host or device pointer */ - double *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *const[], int, const double *const[], int, - const double *, double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemm3mBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *const Aarray[], int lda, - const cuDoubleComplex *const Barray[], int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *const[], int, - const cuDoubleComplex *const[], int, const cuDoubleComplex *, - cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *const Aarray[], cudaDataType Atype, int lda, - const void *const Barray[], cudaDataType Btype, int ldb, - const void *beta, /* host or device pointer */ - void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, - cudaDataType computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *const[], cudaDataType, int, const void *const[], - cudaDataType, int, const void *, void *const[], cudaDataType, int, int, - cudaDataType, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, Atype, lda, - Barray, Btype, ldb, beta, Carray, Ctype, ldc, batchCount, - computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - long long int strideA, /* purposely signed */ - const void *B, cudaDataType Btype, int ldb, long long int strideB, - const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, - cudaDataType computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, long long, const void *, - cudaDataType, int, long long, const void *, void *, cudaDataType, int, - long long, int, cudaDataType, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmStridedBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, - strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, - batchCount, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, long long int strideA, /* purposely signed */ - const float *B, int ldb, long long int strideB, - const float *beta, /* host or device pointer */ - float *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, long long, const float *, int, - long long, const float *, float *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasSgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, long long int strideA, /* purposely signed */ - const double *B, int ldb, long long int strideB, - const double *beta, /* host or device pointer */ - double *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, long long, const double *, int, - long long, const double *, double *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasDgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - long long int strideA, /* purposely signed */ - const cuDoubleComplex *B, int ldb, long long int strideB, - const cuDoubleComplex *beta, /* host or device poi */ - cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, - cuDoubleComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasZgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - const cuComplex *B, int ldb, cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrfBatched( - cublasHandle_t handle, int n, float *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrfBatched( - cublasHandle_t handle, int n, double *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrfBatched( - cublasHandle_t handle, int n, cuComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrfBatched( - cublasHandle_t handle, int n, cuDoubleComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetriBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - float *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *const[], int, const int *, - float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetriBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - double *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *const[], int, const int *, - double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetriBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, const int *, - cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgetriBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuDoubleComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const float *const Aarray[], int lda, const int *devIpiv, - float *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *const[], int, - const int *, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const double *const Aarray[], int lda, const int *devIpiv, - double *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *const[], int, - const int *, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuComplex *const Aarray[], int lda, const int *devIpiv, - cuComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *const[], - int, const int *, cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *const Aarray[], int lda, const int *devIpiv, - cuDoubleComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, - const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasStrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /*Host or Device Pointer*/ - const float *const A[], int lda, float *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *const[], int, - float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasStrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /*Host or Device Pointer*/ - const double *const A[], int lda, double *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *const[], int, - double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /*Host or Device Pointer*/ - const cuComplex *const A[], int lda, cuComplex *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *const[], - int, cuComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasCtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /*Host or Device Pointer*/ - const cuDoubleComplex *const A[], int lda, cuDoubleComplex *const B[], - int ldb, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSmatinvBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, float *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const float *const[], - int, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDmatinvBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, double *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const double *const[], - int, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCmatinvBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, cuComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, cuComplex *const[], - int, int *, int); - static auto func_ptr = LoadSymbol("cublasCmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZmatinvBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, cuDoubleComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, - float *const Aarray[], /*Device pointer*/ - int lda, float *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, float *const[], - int, float *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasSgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, - double *const Aarray[], /*Device pointer*/ - int lda, double *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, double *const[], - int, double *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasDgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, - cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuComplex *const[], int, cuComplex *const[], - int *, int); - static auto func_ptr = LoadSymbol("cublasCgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( - cublasHandle_t handle, int m, int n, - cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasZgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, float *const Aarray[], /*Device pointer*/ - int lda, float *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, float *const[], int, - float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, double *const Aarray[], /*Device pointer*/ - int lda, double *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, double *const[], int, - double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *const[], int, - cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, - cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int *, - int *, int); - static auto func_ptr = LoadSymbol("cublasZgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const float *A, int lda, const float *x, - int incx, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const double *A, int lda, - const double *x, int incx, double *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuComplex *A, int lda, - const cuComplex *x, int incx, - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *AP, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *AP, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *AP, cuComplex *A, - int lda) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *A, int lda, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasStrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *A, int lda, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *A, int lda, - cuComplex *AP) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus CUBLASWINAPI cublasInit(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasShutdown(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasShutdown"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetError(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetVersion(int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *); - static auto func_ptr = LoadSymbol("cublasGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **); - static auto func_ptr = LoadSymbol("cublasAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr); -} - -cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *); - static auto func_ptr = LoadSymbol("cublasFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devicePtr); -} - -cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetKernelStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDznrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y, - int incy) { - using FuncPtr = - float(CUBLASWINAPI *)(int, const float *, int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSdot"); - return func_ptr(n, x, incx, y, incy); -} - -double CUBLASWINAPI cublasDdot(int n, const double *x, int incx, - const double *y, int incy) { - using FuncPtr = - double(CUBLASWINAPI *)(int, const double *, int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDdot"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx, - float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x, - int incx, cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZswap"); - return func_ptr(n, x, incx, y, incy); -} - -int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDasum"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDzasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum"); - return func_ptr(n, x, incx); -} - -void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy, - float sc, float ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float); - static auto func_ptr = LoadSymbol("cublasSrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy, - double sc, double ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double); - static auto func_ptr = LoadSymbol("cublasDrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, cuComplex s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, cuComplex); - static auto func_ptr = LoadSymbol("cublasCrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double sc, - cuDoubleComplex cs) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cublasZrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrot"); - return func_ptr(n, x, incx, y, incy, sc, cs); -} - -void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, float s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, float); - static auto func_ptr = LoadSymbol("cublasCsrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double c, - double s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double); - static auto func_ptr = LoadSymbol("cublasZdrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) { - using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc, - cuComplex *cs) { - using FuncPtr = - void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb, - double *sc, cuDoubleComplex *cs) { - using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, - double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, - const float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, - const double *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1, - const float *sy1, float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1, - const double *sy1, double *sparam) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku, - float alpha, const float *A, int lda, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku, - double alpha, const double *A, int lda, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *x, int incx, cuComplex beta, - cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A, - int lda, const float *x, int incx, float beta, - float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A, - int lda, const double *x, int incx, double beta, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, const double *, - int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha, - const cuComplex *AP, const cuComplex *x, int incx, - cuComplex beta, cuComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *AP, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x, - int incx, float *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x, - int incx, double *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasZher(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x, - int incx, float *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x, - int incx, double *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k, - float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, float beta, float *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha, - const double *A, int lda, double beta, double *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, double, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - cuComplex beta, cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, - int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, - const cuDoubleComplex *, int, - cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha, - const cuComplex *A, int lda, float beta, - cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, - float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha, - const cuDoubleComplex *A, int lda, double beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double, - const cuDoubleComplex *, int, double, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, float beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - double beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha, - const double *A, int lda, const double *B, - int ldb, double beta, double *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublas_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cublas_11_0.inc deleted file mode 100644 index d287a91c562217..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublas_11_0.inc +++ /dev/null @@ -1,5197 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *); - static auto func_ptr = LoadSymbol("cublasCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t); - static auto func_ptr = LoadSymbol("cublasDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, - int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); - static auto func_ptr = LoadSymbol("cublasGetVersion_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cublasGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -size_t CUBLASWINAPI cublasGetCudartVersion(void) { - using FuncPtr = size_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetCudartVersion"); - if (!func_ptr) LogFatalSymbolNotFound("cublasGetCudartVersion"); - return func_ptr(); -} - -cublasStatus_t CUBLASWINAPI cublasSetWorkspace_v2(cublasHandle_t handle, - void *workspace, - size_t workspaceSizeInBytes) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, void *, size_t); - static auto func_ptr = LoadSymbol("cublasSetWorkspace_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, workspace, workspaceSizeInBytes); -} - -cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cublasGetStream_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *); - static auto func_ptr = LoadSymbol("cublasGetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, - cublasPointerMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t); - static auto func_ptr = LoadSymbol("cublasSetPointerMode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *); - static auto func_ptr = LoadSymbol("cublasGetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, - cublasAtomicsMode_t mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t); - static auto func_ptr = LoadSymbol("cublasSetAtomicsMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, - cublasMath_t *mode) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *); - static auto func_ptr = LoadSymbol("cublasGetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, - cublasMath_t mode) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t); - static auto func_ptr = LoadSymbol("cublasSetMathMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cublasStatus_t CUBLASWINAPI cublasGetSmCountTarget(cublasHandle_t handle, - int *smCountTarget) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); - static auto func_ptr = LoadSymbol("cublasGetSmCountTarget"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, smCountTarget); -} - -cublasStatus_t CUBLASWINAPI cublasSetSmCountTarget(cublasHandle_t handle, - int smCountTarget) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int); - static auto func_ptr = LoadSymbol("cublasSetSmCountTarget"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, smCountTarget); -} - -const char *CUBLASWINAPI cublasGetStatusName(cublasStatus_t status) { - using FuncPtr = const char *(CUBLASWINAPI *)(cublasStatus_t); - static auto func_ptr = LoadSymbol("cublasGetStatusName"); - if (!func_ptr) return "cublasGetStatusName symbol not found."; - return func_ptr(status); -} - -const char *CUBLASWINAPI cublasGetStatusString(cublasStatus_t status) { - using FuncPtr = const char *(CUBLASWINAPI *)(cublasStatus_t); - static auto func_ptr = LoadSymbol("cublasGetStatusString"); - if (!func_ptr) return "cublasGetStatusString symbol not found."; - return func_ptr(status); -} - -cublasStatus_t CUBLASWINAPI cublasLoggerConfigure(int logIsOn, int logToStdOut, - int logToStdErr, - const char *logFileName) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const char *); - static auto func_ptr = LoadSymbol("cublasLoggerConfigure"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(logIsOn, logToStdOut, logToStdErr, logFileName); -} - -cublasStatus_t CUBLASWINAPI -cublasSetLoggerCallback(cublasLogCallback userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback); - static auto func_ptr = LoadSymbol("cublasSetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI -cublasGetLoggerCallback(cublasLogCallback *userCallback) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback *); - static auto func_ptr = LoadSymbol("cublasGetLoggerCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(userCallback); -} - -cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x, - int incx, void *devicePtr, - int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, devicePtr, incy); -} - -cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x, - int incx, void *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetVector"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasSetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, - const void *A, int lda, void *B, - int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, - int, void *, int); - static auto func_ptr = LoadSymbol("cublasGetMatrix"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, - const void *hostPtr, int incx, - void *devicePtr, int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, - const void *devicePtr, - int incx, void *hostPtr, - int incy, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, - void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetVectorAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream); -} - -cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, - int elemSize, const void *A, - int lda, void *B, int ldb, - cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - int, int, int, const void *, int, void *, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cublasGetMatrixAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); -} - -void CUBLASWINAPI cublasXerbla(const char *srName, int info) { - using FuncPtr = void(CUBLASWINAPI *)(const char *, int); - static auto func_ptr = LoadSymbol("cublasXerbla"); - if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla"); - return func_ptr(srName, info); -} - -cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *result, - cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasNrm2Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScnrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDznrm2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, const void *y, - cudaDataType yType, int incy, - void *result, cudaDataType resultType, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, const void *, - cudaDataType, int, void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasDotcEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, - executionType); -} - -cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, - const float *x, int incx, - const float *y, int incy, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, - const double *x, int incx, - const double *y, int incy, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDdot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - const cuComplex *y, int incy, - cuComplex *result) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotu_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZdotc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, result); -} - -cublasStatus_t CUBLASWINAPI -cublasScalEx(cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, void *x, cudaDataType xType, int incx, - cudaDataType executionType) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, - int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasScalEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType); -} - -cublasStatus_t CUBLASWINAPI -cublasSscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasDscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCscal_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasCsscal_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZscal_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasZdscal_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasAxpyEx( - cublasHandle_t handle, int n, - const void *alpha, /* host or device pointer */ - cudaDataType alphaType, const void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, const void *, - cudaDataType, int, void *, cudaDataType, int, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAxpyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSaxpy_v2(cublasHandle_t handle, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDaxpy_v2(cublasHandle_t handle, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCaxpy_v2(cublasHandle_t handle, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *y, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZaxpy_v2( - cublasHandle_t handle, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, alpha, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCopyEx(cublasHandle_t handle, int n, - const void *x, cudaDataType xType, - int incx, void *y, cudaDataType yType, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCopyEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy); -} - -cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, - const float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, - const double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, - cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, - cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSwapEx(cublasHandle_t handle, int n, void *x, - cudaDataType xType, int incx, void *y, - cudaDataType yType, int incy) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, void *, cudaDataType, - int, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSwapEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy); -} - -cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamax_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIamaxEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - int *result /* host or device pointer */ -) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, int *); - static auto func_ptr = LoadSymbol("cublasIamaxEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, - const float *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, int *); - static auto func_ptr = LoadSymbol("cublasIsamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, - const double *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, int *); - static auto func_ptr = LoadSymbol("cublasIdamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIcamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - int *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cublasIzamin_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasIaminEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - int *result /* host or device pointer */ -) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, int *); - static auto func_ptr = LoadSymbol("cublasIaminEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasAsumEx( - cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, - void *result, cudaDataType resultType, /* host or device pointer */ - cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const void *, cudaDataType, int, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasAsumEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, result, resultType, executiontype); -} - -cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, - const float *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, - const double *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, - const cuComplex *x, int incx, - float *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *, int, float *); - static auto func_ptr = LoadSymbol("cublasScasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, - const cuDoubleComplex *x, int incx, - double *result) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *, int, double *); - static auto func_ptr = LoadSymbol("cublasDzasum_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, result); -} - -cublasStatus_t CUBLASWINAPI -cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, - int, const float *, const float *); - static auto func_ptr = LoadSymbol("cublasSrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y, - int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *, - const double *); - static auto func_ptr = LoadSymbol("cublasDrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasCsrot_v2( - cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, - int incy, const float *c, /* host or device pointer */ - const float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, - const float *); - static auto func_ptr = LoadSymbol("cublasCsrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZdrot_v2( - cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ - const double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - const double *, const double *); - static auto func_ptr = LoadSymbol("cublasZdrot_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasRotEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, int incx, - void *y, cudaDataType yType, int incy, - const void *c, /* host or device pointer */ - const void *s, cudaDataType csType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, - const void *, const void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, c, s, csType, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */ - float *b, /* host or device pointer */ - float *c, /* host or device pointer */ - float *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *, - float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */ - double *b, /* host or device pointer */ - double *c, /* host or device pointer */ - double *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *, - double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI -cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */ - cuComplex *b, /* host or device pointer */ - float *c, /* host or device pointer */ - cuComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasZrotg_v2( - cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */ - cuDoubleComplex *b, /* host or device pointer */ - double *c, /* host or device pointer */ - cuDoubleComplex *s) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, c, s); -} - -cublasStatus_t CUBLASWINAPI cublasRotgEx(cublasHandle_t handle, - void *a, /* host or device pointer */ - void *b, /* host or device pointer */ - cudaDataType abType, - void *c, /* host or device pointer */ - void *s, /* host or device pointer */ - cudaDataType csType, - cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, void *, void *, - cudaDataType, void *, void *, - cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotgEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, a, b, abType, c, s, csType, executiontype); -} - -cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, - float *x, int incx, float *y, - int incy, const float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, - double *x, int incx, double *y, - int incy, const double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, incx, y, incy, param); -} - -cublasStatus_t CUBLASWINAPI -cublasRotmEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, - int incx, void *y, cudaDataType yType, int incy, - const void *param, /* host or device pointer */ - cudaDataType paramType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, - const void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, x, xType, incx, y, yType, incy, param, paramType, - executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */ - float *d2, /* host or device pointer */ - float *x1, /* host or device pointer */ - const float *y1, /* host or device pointer */ - float *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */ - double *d2, /* host or device pointer */ - double *x1, /* host or device pointer */ - const double *y1, /* host or device pointer */ - double *param) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, double *, double *, double *, const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d2, x1, y1, param); -} - -cublasStatus_t CUBLASWINAPI -cublasRotmgEx(cublasHandle_t handle, void *d1, /* host or device pointer */ - cudaDataType d1Type, void *d2, /* host or device pointer */ - cudaDataType d2Type, void *x1, /* host or device pointer */ - cudaDataType x1Type, const void *y1, /* host or device pointer */ - cudaDataType y1Type, void *param, /* host or device pointer */ - cudaDataType paramType, cudaDataType executiontype) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, void *, cudaDataType, void *, cudaDataType, void *, - cudaDataType, const void *, cudaDataType, void *, cudaDataType, - cudaDataType); - static auto func_ptr = LoadSymbol("cublasRotmgEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, d1, d1Type, d2, d2Type, x1, x1Type, y1, y1Type, param, - paramType, executiontype); -} - -cublasStatus_t CUBLASWINAPI -cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgemv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, - const float *, int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int kl, int ku, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasCgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasZgbmv_v2( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, - int ku, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, - incy); -} - -cublasStatus_t CUBLASWINAPI cublasStrmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStrsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtpsv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const float *A, int lda, float *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const double *A, int lda, double *x, - int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuComplex *A, int lda, - cuComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, - cublasFillMode_t uplo, - cublasOperation_t trans, - cublasDiagType_t diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, - int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); -} - -cublasStatus_t CUBLASWINAPI -cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZsymv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhemv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, - int, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const double *, - const double *, int, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasZhbmv_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *AP, const float *x, int incx, - const float *beta, /* host or device pointer */ - float *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *AP, const double *x, int incx, - const double *beta, /* host or device pointer */ - double *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *AP, const cuComplex *x, int incx, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasChpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -cublasStatus_t CUBLASWINAPI cublasSger_v2( - cublasHandle_t handle, int m, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDger_v2( - cublasHandle_t handle, int m, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeru_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCgerc_v2(cublasHandle_t handle, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgeru_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZgerc_v2(cublasHandle_t handle, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - float *); - static auto func_ptr = LoadSymbol("cublasSspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, double *); - static auto func_ptr = LoadSymbol("cublasDspr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const cuComplex *x, int incx, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, - int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); -} - -cublasStatus_t CUBLASWINAPI -cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const float *alpha, /* host or device pointer */ - const float *x, int incx, const float *y, int incy, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDspr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const double *alpha, /* host or device pointer */ - const double *x, int incx, const double *y, int incy, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, const double *, - int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasChpr2_v2( - cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI -cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, - int incy, cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); -} - -cublasStatus_t CUBLASWINAPI cublasSgemvBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const float *alpha, /* host or device pointer */ - const float *const Aarray[], int lda, const float *const xarray[], int incx, - const float *beta, /* host or device pointer */ - float *const yarray[], int incy, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *, - const float *const[], int, const float *const[], int, const float *, - float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasSgemvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, - yarray, incy, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemvBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const double *alpha, /* host or device pointer */ - const double *const Aarray[], int lda, const double *const xarray[], - int incx, const double *beta, /* host or device pointer */ - double *const yarray[], int incy, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *, - const double *const[], int, const double *const[], int, const double *, - double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDgemvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, - yarray, incy, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemvBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const xarray[], - int incx, const cuComplex *beta, /* host or device pointer */ - cuComplex *const yarray[], int incy, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, - const cuComplex *const[], int, const cuComplex *const[], int, - const cuComplex *, cuComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasCgemvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, - yarray, incy, batchCount); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *const Aarray[], int lda, - const cuDoubleComplex *const xarray[], int incx, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *const yarray[], int incy, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *const[], int, const cuDoubleComplex *const[], int, - const cuDoubleComplex *, cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZgemvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, - yarray, incy, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, long long int strideA, /* purposely signed */ - const float *x, int incx, long long int stridex, - const float *beta, /* host or device pointer */ - float *y, int incy, long long int stridey, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, - int, long long, const float *, int, long long, const float *, float *, - int, long long, int); - static auto func_ptr = LoadSymbol("cublasSgemvStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, - beta, y, incy, stridey, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, long long int strideA, /* purposely signed */ - const double *x, int incx, long long int stridex, - const double *beta, /* host or device pointer */ - double *y, int incy, long long int stridey, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *, - const double *, int, long long, const double *, int, long long, - const double *, double *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasDgemvStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, - beta, y, incy, stridey, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *x, int incx, long long int stridex, - const cuComplex *beta, /* host or device pointer */ - cuComplex *y, int incy, long long int stridey, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, - const cuComplex *, int, long long, const cuComplex *, int, long long, - const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemvStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, - beta, y, incy, stridey, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched( - cublasHandle_t handle, cublasOperation_t trans, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - long long int strideA, /* purposely signed */ - const cuDoubleComplex *x, int incx, long long int stridex, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, int incy, long long int stridey, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, - long long, const cuDoubleComplex *, cuDoubleComplex *, int, long long, - int); - static auto func_ptr = LoadSymbol("cublasZgemvStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, - beta, y, incy, stridey, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3m( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgemm_v2( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, - int ldb, const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgemm3m"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const float *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasSgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasGemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, const void *B, - cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, cublasComputeType_t computeType, - cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, const void *, cudaDataType, - int, const void *, void *, cudaDataType, int, cublasComputeType_t, - cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, const void *A, - cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, - const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const void *, cudaDataType, int, const void *, - cudaDataType, int, const cuComplex *, void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCgemmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, - Btype, ldb, beta, C, Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasUint8gemmBias( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - cublasOperation_t transc, int m, int n, int k, const unsigned char *A, - int A_bias, int lda, const unsigned char *B, int B_bias, int ldb, - unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, - int, int, int, const unsigned char *, int, int, const unsigned char *, - int, int, unsigned char *, int, int, int, int); - static auto func_ptr = LoadSymbol("cublasUint8gemmBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, - B_bias, ldb, C, C_bias, ldc, C_mult, C_shift); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cublasCsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const cuComplex *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype, - int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const void *, cudaDataType, int, const cuComplex *, - void *, cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCsyrk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const cuComplex *, int, const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherk_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const cuDoubleComplex *, int, const double *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - const float *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherkEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherk3mEx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, const void *A, cudaDataType Atype, - int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const void *, cudaDataType, int, const float *, void *, - cudaDataType, int); - static auto func_ptr = LoadSymbol("cublasCherk3mEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, - Ctype, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyr2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZher2k_v2( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsyrkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsyrkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const float *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const float *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZherkx( - cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, - int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const double *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherkx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, - const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const float *, const float *, int, const float *, int, const float *, - float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, - const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const double *, const double *, int, const double *, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZsymm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZsymm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasChemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZhemm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, - int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZhemm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, float *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, float *, - int); - static auto func_ptr = LoadSymbol("cublasStrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, double *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, double *, - int); - static auto func_ptr = LoadSymbol("cublasDtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, cuComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); -} - -cublasStatus_t CUBLASWINAPI cublasStrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZtrmm_v2( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *const Aarray[], int lda, const float *const Barray[], int ldb, - const float *beta, /* host or device pointer */ - float *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *const[], int, const float *const[], int, - const float *, float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasSgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *const Aarray[], int lda, const double *const Barray[], - int ldb, const double *beta, /* host or device pointer */ - double *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *const[], int, const double *const[], int, - const double *, double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], - int ldb, const cuComplex *beta, /* host or device pointer */ - cuComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *const[], int, - const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, - int); - static auto func_ptr = LoadSymbol("cublasCgemm3mBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI -cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *const Aarray[], int lda, - const cuDoubleComplex *const Barray[], int ldb, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *const Carray[], int ldc, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *const[], int, - const cuDoubleComplex *const[], int, const cuDoubleComplex *, - cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZgemmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, - ldb, beta, Carray, ldc, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *const Aarray[], cudaDataType Atype, int lda, - const void *const Barray[], cudaDataType Btype, int ldb, - const void *beta, /* host or device pointer */ - void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, - cublasComputeType_t computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *const[], cudaDataType, int, const void *const[], - cudaDataType, int, const void *, void *const[], cudaDataType, int, int, - cublasComputeType_t, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, Atype, lda, - Barray, Btype, ldb, beta, Carray, Ctype, ldc, batchCount, - computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const void *alpha, /* host or device pointer */ - const void *A, cudaDataType Atype, int lda, - long long int strideA, /* purposely signed */ - const void *B, cudaDataType Btype, int ldb, long long int strideB, - const void *beta, /* host or device pointer */ - void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, - cublasComputeType_t computeType, cublasGemmAlgo_t algo) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const void *, const void *, cudaDataType, int, long long, const void *, - cudaDataType, int, long long, const void *, void *, cudaDataType, int, - long long, int, cublasComputeType_t, cublasGemmAlgo_t); - static auto func_ptr = LoadSymbol("cublasGemmStridedBatchedEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, - strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, - batchCount, computeType, algo); -} - -cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const float *alpha, /* host or device pointer */ - const float *A, int lda, long long int strideA, /* purposely signed */ - const float *B, int ldb, long long int strideB, - const float *beta, /* host or device pointer */ - float *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const float *, const float *, int, long long, const float *, int, - long long, const float *, float *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasSgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const double *alpha, /* host or device pointer */ - const double *A, int lda, long long int strideA, /* purposely signed */ - const double *B, int ldb, long long int strideB, - const double *beta, /* host or device pointer */ - double *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const double *, const double *, int, long long, const double *, int, - long long, const double *, double *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasDgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, long long int strideA, /* purposely signed */ - const cuComplex *B, int ldb, long long int strideB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuComplex *, const cuComplex *, int, long long, const cuComplex *, - int, long long, const cuComplex *, cuComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasCgemm3mStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - long long int strideA, /* purposely signed */ - const cuDoubleComplex *B, int ldb, long long int strideB, - const cuDoubleComplex *beta, /* host or device poi */ - cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, - cuDoubleComplex *, int, long long, int); - static auto func_ptr = LoadSymbol("cublasZgemmStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, - ldb, strideB, beta, C, ldc, strideC, batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *beta, /* host or device pointer */ - const float *B, int ldb, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const float *, const float *, int, const float *, const float *, int, - float *, int); - static auto func_ptr = LoadSymbol("cublasSgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *beta, /* host or device pointer */ - const double *B, int ldb, double *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const double *, const double *, int, const double *, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, - const cuComplex *beta, /* host or device pointer */ - const cuComplex *B, int ldb, cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuComplex *, const cuComplex *, int, const cuComplex *, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZgeam( - cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, - int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, /* host or device pointer */ - const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, - const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cublasZgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, - ldc); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrfBatched( - cublasHandle_t handle, int n, float *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrfBatched( - cublasHandle_t handle, int n, double *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrfBatched( - cublasHandle_t handle, int n, cuComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrfBatched( - cublasHandle_t handle, int n, cuDoubleComplex *const A[], /*Device pointer*/ - int lda, int *P, /*Device Pointer*/ - int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, cuDoubleComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetriBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - float *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const float *const[], int, const int *, - float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetriBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - double *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const double *const[], int, const int *, - double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetriBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, const int *, - cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgetriBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, const int *P, /*Device pointer*/ - cuDoubleComplex *const C[], /*Device pointer*/ - int ldc, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetriBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const float *const Aarray[], int lda, const int *devIpiv, - float *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const float *const[], int, - const int *, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const double *const Aarray[], int lda, const int *devIpiv, - double *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const double *const[], int, - const int *, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuComplex *const Aarray[], int lda, const int *devIpiv, - cuComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, const cuComplex *const[], - int, const int *, cuComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasCgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgetrsBatched( - cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *const Aarray[], int lda, const int *devIpiv, - cuDoubleComplex *const Barray[], int ldb, int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, - const cuDoubleComplex *const[], int, const int *, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZgetrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, - info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasStrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const float *alpha, /*Host or Device Pointer*/ - const float *const A[], int lda, float *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const float *, const float *const[], int, - float *const[], int, int); - static auto func_ptr = LoadSymbol("cublasStrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const double *alpha, /*Host or Device Pointer*/ - const double *const A[], int lda, double *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const double *, const double *const[], int, - double *const[], int, int); - static auto func_ptr = LoadSymbol("cublasDtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuComplex *alpha, /*Host or Device Pointer*/ - const cuComplex *const A[], int lda, cuComplex *const B[], int ldb, - int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuComplex *, const cuComplex *const[], - int, cuComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasCtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( - cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, cublasDiagType_t diag, int m, int n, - const cuDoubleComplex *alpha, /*Host or Device Pointer*/ - const cuDoubleComplex *const A[], int lda, cuDoubleComplex *const B[], - int ldb, int batchCount) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - cublasDiagType_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int); - static auto func_ptr = LoadSymbol("cublasZtrsmBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, - batchCount); -} - -cublasStatus_t CUBLASWINAPI cublasSmatinvBatched( - cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ - int lda, float *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const float *const[], - int, float *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasSmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasDmatinvBatched( - cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ - int lda, double *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const double *const[], - int, double *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasDmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasCmatinvBatched( - cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ - int lda, cuComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuComplex *const[], int, cuComplex *const[], - int, int *, int); - static auto func_ptr = LoadSymbol("cublasCmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZmatinvBatched(cublasHandle_t handle, int n, - const cuDoubleComplex *const A[], /*Device pointer*/ - int lda, cuDoubleComplex *const Ainv[], /*Device pointer*/ - int lda_inv, int *info, /*Device Pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, const cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int, int *, int); - static auto func_ptr = LoadSymbol("cublasZmatinvBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, - float *const Aarray[], /*Device pointer*/ - int lda, float *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, float *const[], - int, float *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasSgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, - double *const Aarray[], /*Device pointer*/ - int lda, double *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, double *const[], - int, double *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasDgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, - cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuComplex *const[], int, cuComplex *const[], - int *, int); - static auto func_ptr = LoadSymbol("cublasCgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( - cublasHandle_t handle, int m, int n, - cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const TauArray[], /*Device pointer*/ - int *info, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, int, int, cuDoubleComplex *const[], int, - cuDoubleComplex *const[], int *, int); - static auto func_ptr = LoadSymbol("cublasZgeqrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, float *const Aarray[], /*Device pointer*/ - int lda, float *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, float *const[], int, - float *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasSgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, double *const Aarray[], /*Device pointer*/ - int lda, double *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, /*Device pointer*/ - int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, double *const[], int, - double *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasDgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuComplex *const Aarray[], /*Device pointer*/ - int lda, cuComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *const[], int, - cuComplex *const[], int, int *, int *, int); - static auto func_ptr = LoadSymbol("cublasCgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI -cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, - int nrhs, cuDoubleComplex *const Aarray[], /*Device pointer*/ - int lda, cuDoubleComplex *const Carray[], /*Device pointer*/ - int ldc, int *info, int *devInfoArray, int batchSize) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasOperation_t, int, int, int, - cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int *, - int *, int); - static auto func_ptr = LoadSymbol("cublasZgelsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, - devInfoArray, batchSize); -} - -cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const float *A, int lda, const float *x, - int incx, float *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const double *A, int lda, - const double *x, int incx, double *C, - int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuComplex *A, int lda, - const cuComplex *x, int incx, - cuComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, - cublasSideMode_t mode, int m, int n, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdgmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); -} - -cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *AP, float *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *AP, double *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cublasDtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *AP, cuComplex *A, - int lda) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *AP, - cuDoubleComplex *A, int lda) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpttr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, AP, A, lda); -} - -cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const float *A, int lda, float *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasStrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const double *A, int lda, double *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuComplex *A, int lda, - cuComplex *AP) { - using FuncPtr = - cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle, - cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *AP) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( - cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZtrttp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, AP); -} - -cublasStatus CUBLASWINAPI cublasInit(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasShutdown(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasShutdown"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetError(void) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); - static auto func_ptr = LoadSymbol("cublasGetError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cublasStatus CUBLASWINAPI cublasGetVersion(int *version) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *); - static auto func_ptr = LoadSymbol("cublasGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **); - static auto func_ptr = LoadSymbol("cublasAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(n, elemSize, devicePtr); -} - -cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *); - static auto func_ptr = LoadSymbol("cublasFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devicePtr); -} - -cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) { - using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cublasSetKernelStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScnrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDznrm2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y, - int incy) { - using FuncPtr = - float(CUBLASWINAPI *)(int, const float *, int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSdot"); - return func_ptr(n, x, incx, y, incy); -} - -double CUBLASWINAPI cublasDdot(int n, const double *x, int incx, - const double *y, int incy) { - using FuncPtr = - double(CUBLASWINAPI *)(int, const double *, int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDdot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDdot"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx, - const cuComplex *y, int incy) { - using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, - const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotu"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu"); - return func_ptr(n, x, incx, y, incy); -} - -cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x, - int incx, const cuDoubleComplex *y, - int incy) { - using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( - int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdotc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZdscal"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal"); - return func_ptr(n, alpha, x, incx); -} - -void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx, - float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x, - int incx, cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZaxpy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy"); - return func_ptr(n, alpha, x, incx, y, incy); -} - -void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasScopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZcopy"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCswap"); - return func_ptr(n, x, incx, y, incy); -} - -void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZswap"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZswap"); - return func_ptr(n, x, incx, y, incy); -} - -int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamax"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasIsamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasIdamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasIcamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin"); - return func_ptr(n, x, incx); -} - -int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasIzamin"); - if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); - static auto func_ptr = LoadSymbol("cublasSasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); - static auto func_ptr = LoadSymbol("cublasDasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDasum"); - return func_ptr(n, x, incx); -} - -float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) { - using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasScasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasScasum"); - return func_ptr(n, x, incx); -} - -double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) { - using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasDzasum"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum"); - return func_ptr(n, x, incx); -} - -void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy, - float sc, float ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float); - static auto func_ptr = LoadSymbol("cublasSrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy, - double sc, double ss) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double); - static auto func_ptr = LoadSymbol("cublasDrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrot"); - return func_ptr(n, x, incx, y, incy, sc, ss); -} - -void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, cuComplex s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, cuComplex); - static auto func_ptr = LoadSymbol("cublasCrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double sc, - cuDoubleComplex cs) { - using FuncPtr = - void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cublasZrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrot"); - return func_ptr(n, x, incx, y, incy, sc, cs); -} - -void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y, - int incy, float c, float s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, - float, float); - static auto func_ptr = LoadSymbol("cublasCsrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx, - cuDoubleComplex *y, int incy, double c, - double s) { - using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double); - static auto func_ptr = LoadSymbol("cublasZdrot"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot"); - return func_ptr(n, x, incx, y, incy, c, s); -} - -void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) { - using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg"); - return func_ptr(sa, sb, sc, ss); -} - -void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc, - cuComplex *cs) { - using FuncPtr = - void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *); - static auto func_ptr = LoadSymbol("cublasCrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb, - double *sc, cuDoubleComplex *cs) { - using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, - double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZrotg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg"); - return func_ptr(ca, cb, sc, cs); -} - -void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, - const float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *); - static auto func_ptr = LoadSymbol("cublasSrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, - const double *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *); - static auto func_ptr = LoadSymbol("cublasDrotm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm"); - return func_ptr(n, x, incx, y, incy, sparam); -} - -void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1, - const float *sy1, float *sparam) { - using FuncPtr = - void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *); - static auto func_ptr = LoadSymbol("cublasSrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1, - const double *sy1, double *sparam) { - using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cublasDrotmg"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg"); - return func_ptr(sd1, sd2, sx1, sy1, sparam); -} - -void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv"); - return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku, - float alpha, const float *A, int lda, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku, - double alpha, const double *A, int lda, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *x, int incx, cuComplex beta, - cuComplex *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv"); - return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, - int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv"); - return func_ptr(uplo, trans, diag, n, A, lda, x, incx); -} - -void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, - const float *AP, float *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cublasStpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, - const double *AP, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, - double *, int); - static auto func_ptr = LoadSymbol("cublasDtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, - const cuComplex *AP, cuComplex *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, - const cuDoubleComplex *AP, cuDoubleComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtpsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv"); - return func_ptr(uplo, trans, diag, n, AP, x, incx); -} - -void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k, - const float *A, int lda, float *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k, - const double *A, int lda, double *x, int incx) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k, - const cuComplex *A, int lda, cuComplex *x, - int incx) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, char, int, int, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *x, int incx) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtbsv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv"); - return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); -} - -void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A, - int lda, const float *x, int incx, float beta, - float *y, int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A, - int lda, const double *x, int incx, double beta, - double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv"); - return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha, - const float *A, int lda, const float *x, int incx, - float beta, float *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha, - const double *A, int lda, const double *x, - int incx, double beta, double *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha, - const cuComplex *A, int lda, const cuComplex *x, - int incx, cuComplex beta, cuComplex *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhbmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv"); - return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP, - const float *x, int incx, float beta, float *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP, - const double *x, int incx, double beta, double *y, - int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, const double *, - int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDspmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha, - const cuComplex *AP, const cuComplex *x, int incx, - cuComplex beta, cuComplex *y, int incy) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *AP, - const cuDoubleComplex *x, int incx, - cuDoubleComplex beta, cuDoubleComplex *y, - int incy) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhpmv"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv"); - return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); -} - -void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDger"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDger"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x, - int incx, const cuComplex *y, int incy, - cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgeru"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgerc"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc"); - return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x, - int incx, float *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x, - int incx, double *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasZher(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher"); - return func_ptr(uplo, n, alpha, x, incx, A, lda); -} - -void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x, - int incx, float *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, float, const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x, - int incx, double *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, double, const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x, - int incx, cuComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, - cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha, - const cuDoubleComplex *x, int incx, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr"); - return func_ptr(uplo, n, alpha, x, incx, AP); -} - -void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *A, - int lda) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *A, int lda) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *A, int lda) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); -} - -void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x, - int incx, const float *y, int incy, float *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, - const float *, int, float *); - static auto func_ptr = LoadSymbol("cublasSspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x, - int incx, const double *y, int incy, double *AP) { - using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, - const double *, int, double *); - static auto func_ptr = LoadSymbol("cublasDspr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha, - const cuComplex *x, int incx, const cuComplex *y, - int incy, cuComplex *AP) { - using FuncPtr = - void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex *); - static auto func_ptr = LoadSymbol("cublasChpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, - const cuDoubleComplex *x, int incx, - const cuDoubleComplex *y, int incy, - cuDoubleComplex *AP) { - using FuncPtr = void(CUBLASWINAPI *)( - char, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cublasZhpr2"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2"); - return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); -} - -void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k, - float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *, - int, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZgemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm"); - return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, float beta, float *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha, - const double *A, int lda, double beta, double *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, double, const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - cuComplex beta, cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, - int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, - const cuDoubleComplex *, int, - cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyrk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha, - const cuComplex *A, int lda, float beta, - cuComplex *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, - float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha, - const cuDoubleComplex *A, int lda, double beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double, - const cuDoubleComplex *, int, double, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZherk"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZherk"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k, - double alpha, const double *A, int lda, - const double *B, int ldb, double beta, double *C, - int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsyr2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, float beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, float, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - double beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZher2k"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k"); - return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, - const float *, int, float, float *, int); - static auto func_ptr = LoadSymbol("cublasSsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha, - const double *A, int lda, const double *B, - int ldb, double beta, double *C, int ldc) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, - const double *, int, double, double *, int); - static auto func_ptr = LoadSymbol("cublasDsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZsymm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n, - cuComplex alpha, const cuComplex *A, int lda, - const cuComplex *B, int ldb, cuComplex beta, - cuComplex *C, int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuComplex, const cuComplex *, int, - const cuComplex *, int, cuComplex, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasChemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasChemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n, - cuDoubleComplex alpha, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *B, int ldb, - cuDoubleComplex beta, cuDoubleComplex *C, - int ldc) { - using FuncPtr = void(CUBLASWINAPI *)( - char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZhemm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm"); - return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); -} - -void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrsm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, - int m, int n, float alpha, const float *A, - int lda, float *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, - const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cublasStrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, - int m, int n, double alpha, const double *A, - int lda, double *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, - const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cublasDtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuComplex alpha, const cuComplex *A, - int lda, cuComplex *B, int ldb) { - using FuncPtr = - void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, - const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cublasCtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, - int m, int n, cuDoubleComplex alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb) { - using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, - cuDoubleComplex, const cuDoubleComplex *, - int, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cublasZtrmm"); - if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm"); - return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cublas_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cublas_stub.cc index 2392e6cdb9ae1f..814d64d75d8d61 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cublas_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cublas_stub.cc @@ -12,11 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + +#include + #if CUBLAS_VER_MAJOR >= 11 #include "third_party/gpus/cuda/include/cublas_v2.h" #else #include "third_party/gpus/cuda/include/cublas.h" #endif + +#include "absl/container/flat_hash_set.h" #include "third_party/gpus/cuda/include/cuda.h" #include "tsl/platform/dso_loader.h" #include "tsl/platform/env.h" @@ -26,43 +31,216 @@ limitations under the License. namespace { // Returns DSO handle or null if loading the DSO fails. -void* GetDsoHandle() { -#ifdef PLATFORM_GOOGLE - return nullptr; -#else - static auto handle = []() -> void* { +void *GetDsoHandle() { + static auto handle = []() -> void * { auto handle_or = tsl::internal::DsoLoader::GetCublasDsoHandle(); if (!handle_or.ok()) return nullptr; return handle_or.value(); }(); return handle; -#endif } -template -T LoadSymbol(const char* symbol_name) { - void* symbol = nullptr; +void *LoadSymbol(const char *symbol_name) { + void *symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -void LogFatalSymbolNotFound(const char* symbol_name) { - LOG(FATAL) << symbol_name << " symbol not found."; +const char *kSymbols[] = { +#include "tsl/cuda/cublas.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char *); + +absl::flat_hash_set const &FatalErrorSymbols() { + static auto *syms = new absl::flat_hash_set{ + "cublasGetCudartVersion", + "cublasXerbla", + "cublasSnrm2", + "cublasDnrm2", + "cublasScnrm2", + "cublasDznrm2", + "cublasSdot", + "cublasDdot", + "cublasCdotu", + "cublasCdotc", + "cublasZdotu", + "cublasZdotc", + "cublasSscal", + "cublasDscal", + "cublasCscal", + "cublasZscal", + "cublasCsscal", + "cublasZdscal", + "cublasSaxpy", + "cublasDaxpy", + "cublasCaxpy", + "cublasZaxpy", + "cublasScopy", + "cublasDcopy", + "cublasCcopy", + "cublasZcopy", + "cublasSswap", + "cublasDswap", + "cublasCswap", + "cublasZswap", + "cublasIsamax", + "cublasIdamax", + "cublasIcamax", + "cublasIzamax", + "cublasIsamin", + "cublasIdamin", + "cublasIcamin", + "cublasIzamin", + "cublasSasum", + "cublasDasum", + "cublasScasum", + "cublasDzasum", + "cublasSrot", + "cublasDrot", + "cublasCrot", + "cublasZrot", + "cublasCsrot", + "cublasZdrot", + "cublasSrotg", + "cublasDrotg", + "cublasCrotg", + "cublasZrotg", + "cublasSrotm", + "cublasDrotm", + "cublasSrotmg", + "cublasDrotmg", + "cublasSgemv", + "cublasDgemv", + "cublasCgemv", + "cublasZgemv", + "cublasSgbmv", + "cublasDgbmv", + "cublasCgbmv", + "cublasZgbmv", + "cublasStrmv", + "cublasDtrmv", + "cublasCtrmv", + "cublasZtrmv", + "cublasStbmv", + "cublasDtbmv", + "cublasCtbmv", + "cublasZtbmv", + "cublasStpmv", + "cublasDtpmv", + "cublasCtpmv", + "cublasZtpmv", + "cublasStrsv", + "cublasDtrsv", + "cublasCtrsv", + "cublasZtrsv", + "cublasStpsv", + "cublasDtpsv", + "cublasCtpsv", + "cublasZtpsv", + "cublasStbsv", + "cublasDtbsv", + "cublasCtbsv", + "cublasZtbsv", + "cublasSsymv", + "cublasDsymv", + "cublasChemv", + "cublasZhemv", + "cublasSsbmv", + "cublasDsbmv", + "cublasChbmv", + "cublasZhbmv", + "cublasSspmv", + "cublasDspmv", + "cublasChpmv", + "cublasZhpmv", + "cublasSger", + "cublasDger", + "cublasCgeru", + "cublasCgerc", + "cublasZgeru", + "cublasZgerc", + "cublasSsyr", + "cublasDsyr", + "cublasCher", + "cublasZher", + "cublasSspr", + "cublasDspr", + "cublasChpr", + "cublasZhpr", + "cublasSsyr2", + "cublasDsyr2", + "cublasCher2", + "cublasZher2", + "cublasSspr2", + "cublasDspr2", + "cublasChpr2", + "cublasZhpr2", + "cublasSgemm", + "cublasDgemm", + "cublasCgemm", + "cublasZgemm", + "cublasSsyrk", + "cublasDsyrk", + "cublasCsyrk", + "cublasZsyrk", + "cublasCherk", + "cublasZherk", + "cublasSsyr2k", + "cublasDsyr2k", + "cublasCsyr2k", + "cublasZsyr2k", + "cublasCher2k", + "cublasZher2k", + "cublasSsymm", + "cublasDsymm", + "cublasCsymm", + "cublasZsymm", + "cublasChemm", + "cublasZhemm", + "cublasStrsm", + "cublasDtrsm", + "cublasCtrsm", + "cublasZtrsm", + "cublasStrmm", + "cublasDtrmm", + "cublasCtrmm", + "cublasZtrmm", + }; + return *syms; } -cublasStatus_t GetSymbolNotFoundError() { return CUBLAS_STATUS_INTERNAL_ERROR; } } // namespace -#if CUDA_VERSION < 10010 -#include "tsl/cuda/cublas_10_0.inc" -#elif CUDA_VERSION < 10020 -#include "tsl/cuda/cublas_10_1.inc" -#elif CUDA_VERSION < 11000 -#include "tsl/cuda/cublas_10_2.inc" -#else -#include "tsl/cuda/cublas_11_0.inc" -#endif +extern "C" { + +static void CublasLogFatalSymbolNotFound(const char *symbol_name) { + LOG(FATAL) << symbol_name << " symbol not found."; +} + +static cublasStatus_t CublasGetSymbolNotFoundError() { + return CUBLAS_STATUS_INTERNAL_ERROR; +} + +extern void *_cublas_tramp_table[]; + +void _cublas_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void *p = LoadSymbol(kSymbols[i]); + if (!p) { + const auto &fatal_error_symbols = FatalErrorSymbols(); + if (fatal_error_symbols.find(kSymbols[i]) != fatal_error_symbols.end()) { + p = reinterpret_cast(&CublasLogFatalSymbolNotFound); + } else { + p = reinterpret_cast(&CublasGetSymbolNotFoundError); + } + } + _cublas_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cuda.symbols new file mode 100644 index 00000000000000..558d11cafdbc99 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cuda.symbols @@ -0,0 +1,583 @@ +cuArray3DCreate +cuArray3DCreate_v2 +cuArray3DGetDescriptor +cuArray3DGetDescriptor_v2 +cuArrayCreate +cuArrayCreate_v2 +cuArrayDestroy +cuArrayGetDescriptor +cuArrayGetDescriptor_v2 +cuArrayGetMemoryRequirements +cuArrayGetPlane +cuArrayGetSparseProperties +cuCtxAttach +cuCtxCreate +cuCtxCreate_v2 +cuCtxCreate_v3 +cuCtxDestroy +cuCtxDestroy_v2 +cuCtxDetach +cuCtxDisablePeerAccess +cuCtxEnablePeerAccess +cuCtxGetApiVersion +cuCtxGetCacheConfig +cuCtxGetCurrent +cuCtxGetDevice +cuCtxGetExecAffinity +cuCtxGetFlags +cuCtxGetId +cuCtxGetLimit +cuCtxGetSharedMemConfig +cuCtxGetStreamPriorityRange +cuCtxPopCurrent +cuCtxPopCurrent_v2 +cuCtxPushCurrent +cuCtxPushCurrent_v2 +cuCtxResetPersistingL2Cache +cuCtxSetCacheConfig +cuCtxSetCurrent +cuCtxSetLimit +cuCtxSetSharedMemConfig +cuCtxSynchronize +cuDestroyExternalMemory +cuDestroyExternalSemaphore +cuDeviceCanAccessPeer +cuDeviceComputeCapability +cuDeviceGet +cuDeviceGetAttribute +cuDeviceGetByPCIBusId +cuDeviceGetCount +cuDeviceGetDefaultMemPool +cuDeviceGetExecAffinitySupport +cuDeviceGetGraphMemAttribute +cuDeviceGetLuid +cuDeviceGetMemPool +cuDeviceGetName +cuDeviceGetNvSciSyncAttributes +cuDeviceGetP2PAttribute +cuDeviceGetPCIBusId +cuDeviceGetProperties +cuDeviceGetTexture1DLinearMaxWidth +cuDeviceGetUuid +cuDeviceGetUuid_v2 +cuDeviceGraphMemTrim +cuDevicePrimaryCtxGetState +cuDevicePrimaryCtxRelease +cuDevicePrimaryCtxRelease_v2 +cuDevicePrimaryCtxReset +cuDevicePrimaryCtxReset_v2 +cuDevicePrimaryCtxRetain +cuDevicePrimaryCtxSetFlags +cuDevicePrimaryCtxSetFlags_v2 +cuDeviceSetGraphMemAttribute +cuDeviceSetMemPool +cuDeviceTotalMem +cuDeviceTotalMem_v2 +cuDriverGetVersion +cuEGLApiInit +cuEGLStreamConsumerAcquireFrame +cuEGLStreamConsumerConnect +cuEGLStreamConsumerConnectWithFlags +cuEGLStreamConsumerDisconnect +cuEGLStreamConsumerReleaseFrame +cuEGLStreamProducerConnect +cuEGLStreamProducerDisconnect +cuEGLStreamProducerPresentFrame +cuEGLStreamProducerReturnFrame +cuEventCreate +cuEventDestroy +cuEventDestroy_v2 +cuEventElapsedTime +cuEventQuery +cuEventRecord +cuEventRecordWithFlags +cuEventRecordWithFlags_ptsz +cuEventRecord_ptsz +cuEventSynchronize +cuExternalMemoryGetMappedBuffer +cuExternalMemoryGetMappedMipmappedArray +cuFlushGPUDirectRDMAWrites +cuFuncGetAttribute +cuFuncGetModule +cuFuncSetAttribute +cuFuncSetBlockShape +cuFuncSetCacheConfig +cuFuncSetSharedMemConfig +cuFuncSetSharedSize +cuGLCtxCreate +cuGLCtxCreate_v2 +cuGLGetDevices +cuGLGetDevices_v2 +cuGLInit +cuGLMapBufferObject +cuGLMapBufferObjectAsync +cuGLMapBufferObjectAsync_v2 +cuGLMapBufferObjectAsync_v2_ptsz +cuGLMapBufferObject_v2 +cuGLMapBufferObject_v2_ptds +cuGLRegisterBufferObject +cuGLSetBufferObjectMapFlags +cuGLUnmapBufferObject +cuGLUnmapBufferObjectAsync +cuGLUnregisterBufferObject +cuGetErrorName +cuGetErrorString +cuGetExportTable +cuGetProcAddress +cuGetProcAddress_v2 +cuGraphAddBatchMemOpNode +cuGraphAddChildGraphNode +cuGraphAddDependencies +cuGraphAddEmptyNode +cuGraphAddEventRecordNode +cuGraphAddEventWaitNode +cuGraphAddExternalSemaphoresSignalNode +cuGraphAddExternalSemaphoresWaitNode +cuGraphAddHostNode +cuGraphAddKernelNode +cuGraphAddKernelNode_v2 +cuGraphAddMemAllocNode +cuGraphAddMemFreeNode +cuGraphAddMemcpyNode +cuGraphAddMemsetNode +cuGraphBatchMemOpNodeGetParams +cuGraphBatchMemOpNodeSetParams +cuGraphChildGraphNodeGetGraph +cuGraphClone +cuGraphCreate +cuGraphDebugDotPrint +cuGraphDestroy +cuGraphDestroyNode +cuGraphEventRecordNodeGetEvent +cuGraphEventRecordNodeSetEvent +cuGraphEventWaitNodeGetEvent +cuGraphEventWaitNodeSetEvent +cuGraphExecBatchMemOpNodeSetParams +cuGraphExecChildGraphNodeSetParams +cuGraphExecDestroy +cuGraphExecEventRecordNodeSetEvent +cuGraphExecEventWaitNodeSetEvent +cuGraphExecExternalSemaphoresSignalNodeSetParams +cuGraphExecExternalSemaphoresWaitNodeSetParams +cuGraphExecGetFlags +cuGraphExecHostNodeSetParams +cuGraphExecKernelNodeSetParams +cuGraphExecKernelNodeSetParams_v2 +cuGraphExecMemcpyNodeSetParams +cuGraphExecMemsetNodeSetParams +cuGraphExecUpdate +cuGraphExecUpdate_v2 +cuGraphExternalSemaphoresSignalNodeGetParams +cuGraphExternalSemaphoresSignalNodeSetParams +cuGraphExternalSemaphoresWaitNodeGetParams +cuGraphExternalSemaphoresWaitNodeSetParams +cuGraphGetEdges +cuGraphGetNodes +cuGraphGetRootNodes +cuGraphHostNodeGetParams +cuGraphHostNodeSetParams +cuGraphInstantiate +cuGraphInstantiateWithFlags +cuGraphInstantiateWithParams +cuGraphInstantiateWithParams_ptsz +cuGraphInstantiate_v2 +cuGraphKernelNodeCopyAttributes +cuGraphKernelNodeGetAttribute +cuGraphKernelNodeGetParams +cuGraphKernelNodeGetParams_v2 +cuGraphKernelNodeSetAttribute +cuGraphKernelNodeSetParams +cuGraphKernelNodeSetParams_v2 +cuGraphLaunch +cuGraphLaunch_ptsz +cuGraphMemAllocNodeGetParams +cuGraphMemFreeNodeGetParams +cuGraphMemcpyNodeGetParams +cuGraphMemcpyNodeSetParams +cuGraphMemsetNodeGetParams +cuGraphMemsetNodeSetParams +cuGraphNodeFindInClone +cuGraphNodeGetDependencies +cuGraphNodeGetDependentNodes +cuGraphNodeGetEnabled +cuGraphNodeGetType +cuGraphNodeSetEnabled +cuGraphReleaseUserObject +cuGraphRemoveDependencies +cuGraphRetainUserObject +cuGraphUpload +cuGraphUpload_ptsz +cuGraphicsEGLRegisterImage +cuGraphicsGLRegisterBuffer +cuGraphicsGLRegisterImage +cuGraphicsMapResources +cuGraphicsMapResources_ptsz +cuGraphicsResourceGetMappedEglFrame +cuGraphicsResourceGetMappedMipmappedArray +cuGraphicsResourceGetMappedPointer +cuGraphicsResourceGetMappedPointer_v2 +cuGraphicsResourceSetMapFlags +cuGraphicsResourceSetMapFlags_v2 +cuGraphicsSubResourceGetMappedArray +cuGraphicsUnmapResources +cuGraphicsUnmapResources_ptsz +cuGraphicsUnregisterResource +cuGraphicsVDPAURegisterOutputSurface +cuGraphicsVDPAURegisterVideoSurface +cuImportExternalMemory +cuImportExternalSemaphore +cuInit +cuIpcCloseMemHandle +cuIpcGetEventHandle +cuIpcGetMemHandle +cuIpcOpenEventHandle +cuIpcOpenMemHandle +cuIpcOpenMemHandle_v2 +cuKernelGetAttribute +cuKernelGetFunction +cuKernelSetAttribute +cuKernelSetCacheConfig +cuLaunch +cuLaunchCooperativeKernel +cuLaunchCooperativeKernelMultiDevice +cuLaunchCooperativeKernel_ptsz +cuLaunchGrid +cuLaunchGridAsync +cuLaunchHostFunc +cuLaunchHostFunc_ptsz +cuLaunchKernel +cuLaunchKernelEx +cuLaunchKernelEx_ptsz +cuLaunchKernel_ptsz +cuLibraryGetGlobal +cuLibraryGetKernel +cuLibraryGetManaged +cuLibraryGetModule +cuLibraryGetUnifiedFunction +cuLibraryLoadData +cuLibraryLoadFromFile +cuLibraryUnload +cuLinkAddData +cuLinkAddData_v2 +cuLinkAddFile +cuLinkAddFile_v2 +cuLinkComplete +cuLinkCreate +cuLinkCreate_v2 +cuLinkDestroy +cuMemAddressFree +cuMemAddressReserve +cuMemAdvise +cuMemAlloc +cuMemAllocAsync +cuMemAllocAsync_ptsz +cuMemAllocFromPoolAsync +cuMemAllocFromPoolAsync_ptsz +cuMemAllocHost +cuMemAllocHost_v2 +cuMemAllocManaged +cuMemAllocPitch +cuMemAllocPitch_v2 +cuMemAlloc_v2 +cuMemCreate +cuMemExportToShareableHandle +cuMemFree +cuMemFreeAsync +cuMemFreeAsync_ptsz +cuMemFreeHost +cuMemFree_v2 +cuMemGetAccess +cuMemGetAddressRange +cuMemGetAddressRange_v2 +cuMemGetAllocationGranularity +cuMemGetAllocationPropertiesFromHandle +cuMemGetAttribute +cuMemGetAttribute_v2 +cuMemGetHandleForAddressRange +cuMemGetInfo +cuMemGetInfo_v2 +cuMemHostAlloc +cuMemHostGetDevicePointer +cuMemHostGetDevicePointer_v2 +cuMemHostGetFlags +cuMemHostRegister +cuMemHostRegister_v2 +cuMemHostUnregister +cuMemImportFromShareableHandle +cuMemMap +cuMemMapArrayAsync +cuMemMapArrayAsync_ptsz +cuMemPoolCreate +cuMemPoolDestroy +cuMemPoolExportPointer +cuMemPoolExportToShareableHandle +cuMemPoolGetAccess +cuMemPoolGetAttribute +cuMemPoolImportFromShareableHandle +cuMemPoolImportPointer +cuMemPoolSetAccess +cuMemPoolSetAttribute +cuMemPoolTrimTo +cuMemPrefetchAsync +cuMemPrefetchAsync_ptsz +cuMemRangeGetAttribute +cuMemRangeGetAttributes +cuMemRelease +cuMemRetainAllocationHandle +cuMemSetAccess +cuMemUnmap +cuMemcpy +cuMemcpy2D +cuMemcpy2DAsync +cuMemcpy2DAsync_v2 +cuMemcpy2DAsync_v2_ptsz +cuMemcpy2DUnaligned +cuMemcpy2DUnaligned_v2 +cuMemcpy2DUnaligned_v2_ptds +cuMemcpy2D_v2 +cuMemcpy2D_v2_ptds +cuMemcpy3D +cuMemcpy3DAsync +cuMemcpy3DAsync_v2 +cuMemcpy3DAsync_v2_ptsz +cuMemcpy3DPeer +cuMemcpy3DPeerAsync +cuMemcpy3DPeerAsync_ptsz +cuMemcpy3DPeer_ptds +cuMemcpy3D_v2 +cuMemcpy3D_v2_ptds +cuMemcpyAsync +cuMemcpyAsync_ptsz +cuMemcpyAtoA +cuMemcpyAtoA_v2 +cuMemcpyAtoA_v2_ptds +cuMemcpyAtoD +cuMemcpyAtoD_v2 +cuMemcpyAtoD_v2_ptds +cuMemcpyAtoH +cuMemcpyAtoHAsync +cuMemcpyAtoHAsync_v2 +cuMemcpyAtoHAsync_v2_ptsz +cuMemcpyAtoH_v2 +cuMemcpyAtoH_v2_ptds +cuMemcpyDtoA +cuMemcpyDtoA_v2 +cuMemcpyDtoA_v2_ptds +cuMemcpyDtoD +cuMemcpyDtoDAsync +cuMemcpyDtoDAsync_v2 +cuMemcpyDtoDAsync_v2_ptsz +cuMemcpyDtoD_v2 +cuMemcpyDtoD_v2_ptds +cuMemcpyDtoH +cuMemcpyDtoHAsync +cuMemcpyDtoHAsync_v2 +cuMemcpyDtoHAsync_v2_ptsz +cuMemcpyDtoH_v2 +cuMemcpyDtoH_v2_ptds +cuMemcpyHtoA +cuMemcpyHtoAAsync +cuMemcpyHtoAAsync_v2 +cuMemcpyHtoAAsync_v2_ptsz +cuMemcpyHtoA_v2 +cuMemcpyHtoA_v2_ptds +cuMemcpyHtoD +cuMemcpyHtoDAsync +cuMemcpyHtoDAsync_v2 +cuMemcpyHtoDAsync_v2_ptsz +cuMemcpyHtoD_v2 +cuMemcpyHtoD_v2_ptds +cuMemcpyPeer +cuMemcpyPeerAsync +cuMemcpyPeerAsync_ptsz +cuMemcpyPeer_ptds +cuMemcpy_ptds +cuMemsetD16 +cuMemsetD16Async +cuMemsetD16Async_ptsz +cuMemsetD16_v2 +cuMemsetD16_v2_ptds +cuMemsetD2D16 +cuMemsetD2D16Async +cuMemsetD2D16Async_ptsz +cuMemsetD2D16_v2 +cuMemsetD2D16_v2_ptds +cuMemsetD2D32 +cuMemsetD2D32Async +cuMemsetD2D32Async_ptsz +cuMemsetD2D32_v2 +cuMemsetD2D32_v2_ptds +cuMemsetD2D8 +cuMemsetD2D8Async +cuMemsetD2D8Async_ptsz +cuMemsetD2D8_v2 +cuMemsetD2D8_v2_ptds +cuMemsetD32 +cuMemsetD32Async +cuMemsetD32Async_ptsz +cuMemsetD32_v2 +cuMemsetD32_v2_ptds +cuMemsetD8 +cuMemsetD8Async +cuMemsetD8Async_ptsz +cuMemsetD8_v2 +cuMemsetD8_v2_ptds +cuMipmappedArrayCreate +cuMipmappedArrayDestroy +cuMipmappedArrayGetLevel +cuMipmappedArrayGetMemoryRequirements +cuMipmappedArrayGetSparseProperties +cuModuleGetFunction +cuModuleGetGlobal +cuModuleGetGlobal_v2 +cuModuleGetLoadingMode +cuModuleGetSurfRef +cuModuleGetTexRef +cuModuleLoad +cuModuleLoadData +cuModuleLoadDataEx +cuModuleLoadFatBinary +cuModuleUnload +cuOccupancyAvailableDynamicSMemPerBlock +cuOccupancyMaxActiveBlocksPerMultiprocessor +cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +cuOccupancyMaxActiveClusters +cuOccupancyMaxPotentialBlockSize +cuOccupancyMaxPotentialBlockSizeWithFlags +cuOccupancyMaxPotentialClusterSize +cuParamSetSize +cuParamSetTexRef +cuParamSetf +cuParamSeti +cuParamSetv +cuPointerGetAttribute +cuPointerGetAttributes +cuPointerSetAttribute +cuProfilerInitialize +cuProfilerStart +cuProfilerStop +cuSignalExternalSemaphoresAsync +cuSignalExternalSemaphoresAsync_ptsz +cuStreamAddCallback +cuStreamAddCallback_ptsz +cuStreamAttachMemAsync +cuStreamAttachMemAsync_ptsz +cuStreamBatchMemOp +cuStreamBatchMemOp_ptsz +cuStreamBatchMemOp_v2 +cuStreamBatchMemOp_v2_ptsz +cuStreamBeginCapture +cuStreamBeginCapture_ptsz +cuStreamBeginCapture_v2 +cuStreamBeginCapture_v2_ptsz +cuStreamCopyAttributes +cuStreamCopyAttributes_ptsz +cuStreamCreate +cuStreamCreateWithPriority +cuStreamDestroy +cuStreamDestroy_v2 +cuStreamEndCapture +cuStreamEndCapture_ptsz +cuStreamGetAttribute +cuStreamGetAttribute_ptsz +cuStreamGetCaptureInfo +cuStreamGetCaptureInfo_ptsz +cuStreamGetCaptureInfo_v2 +cuStreamGetCaptureInfo_v2_ptsz +cuStreamGetCtx +cuStreamGetCtx_ptsz +cuStreamGetFlags +cuStreamGetFlags_ptsz +cuStreamGetId +cuStreamGetId_ptsz +cuStreamGetPriority +cuStreamGetPriority_ptsz +cuStreamIsCapturing +cuStreamIsCapturing_ptsz +cuStreamQuery +cuStreamQuery_ptsz +cuStreamSetAttribute +cuStreamSetAttribute_ptsz +cuStreamSynchronize +cuStreamSynchronize_ptsz +cuStreamUpdateCaptureDependencies +cuStreamUpdateCaptureDependencies_ptsz +cuStreamWaitEvent +cuStreamWaitEvent_ptsz +cuStreamWaitValue32 +cuStreamWaitValue32_ptsz +cuStreamWaitValue32_v2 +cuStreamWaitValue32_v2_ptsz +cuStreamWaitValue64 +cuStreamWaitValue64_ptsz +cuStreamWaitValue64_v2 +cuStreamWaitValue64_v2_ptsz +cuStreamWriteValue32 +cuStreamWriteValue32_ptsz +cuStreamWriteValue32_v2 +cuStreamWriteValue32_v2_ptsz +cuStreamWriteValue64 +cuStreamWriteValue64_ptsz +cuStreamWriteValue64_v2 +cuStreamWriteValue64_v2_ptsz +cuSurfObjectCreate +cuSurfObjectDestroy +cuSurfObjectGetResourceDesc +cuSurfRefGetArray +cuSurfRefSetArray +cuTensorMapEncodeIm2col +cuTensorMapEncodeTiled +cuTensorMapReplaceAddress +cuTexObjectCreate +cuTexObjectDestroy +cuTexObjectGetResourceDesc +cuTexObjectGetResourceViewDesc +cuTexObjectGetTextureDesc +cuTexRefCreate +cuTexRefDestroy +cuTexRefGetAddress +cuTexRefGetAddressMode +cuTexRefGetAddress_v2 +cuTexRefGetArray +cuTexRefGetBorderColor +cuTexRefGetFilterMode +cuTexRefGetFlags +cuTexRefGetFormat +cuTexRefGetMaxAnisotropy +cuTexRefGetMipmapFilterMode +cuTexRefGetMipmapLevelBias +cuTexRefGetMipmapLevelClamp +cuTexRefGetMipmappedArray +cuTexRefSetAddress +cuTexRefSetAddress2D +cuTexRefSetAddress2D_v2 +cuTexRefSetAddress2D_v3 +cuTexRefSetAddressMode +cuTexRefSetAddress_v2 +cuTexRefSetArray +cuTexRefSetBorderColor +cuTexRefSetFilterMode +cuTexRefSetFlags +cuTexRefSetFormat +cuTexRefSetMaxAnisotropy +cuTexRefSetMipmapFilterMode +cuTexRefSetMipmapLevelBias +cuTexRefSetMipmapLevelClamp +cuTexRefSetMipmappedArray +cuThreadExchangeStreamCaptureMode +cuUserObjectCreate +cuUserObjectRelease +cuUserObjectRetain +cuVDPAUCtxCreate +cuVDPAUCtxCreate_v2 +cuVDPAUGetDevice +cuWaitExternalSemaphoresAsync +cuWaitExternalSemaphoresAsync_ptsz +cudbgApiAttach +cudbgApiDetach +cudbgApiInit +cudbgGetAPI +cudbgGetAPIVersion +cudbgMain +cudbgReportDriverApiError +cudbgReportDriverInternalError diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_0.inc deleted file mode 100644 index 6f26cfb92d1d85..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_0.inc +++ /dev/null @@ -1,2133 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuInit(unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cuInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(Flags); -} - -CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); - static auto func_ptr = LoadSymbol("cuDeviceGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, ordinal); -} - -CUresult CUDAAPI cuDeviceGetCount(int *count) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDeviceGetCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(name, len, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(bytes, dev); -} - -CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, - int *minor, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags); -} - -CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, - int *active) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags, active); -} - -CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags, dev); -} - -CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); - static auto func_ptr = LoadSymbol("cuCtxGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -CUresult CUDAAPI cuCtxSynchronize(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { - using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); - static auto func_ptr = LoadSymbol("cuCtxSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); - static auto func_ptr = LoadSymbol("cuCtxGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pvalue, limit); -} - -CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); - static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pconfig); -} - -CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); - static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); - static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); - static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, version); -} - -CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, - int *greatestPriority) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *); - static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxAttach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDetach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); - static auto func_ptr = LoadSymbol("cuModuleLoad"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fname); -} - -CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image); -} - -CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, - unsigned int numOptions, - CUjit_option *options, - void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, - CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fatCubin); -} - -CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule); - static auto func_ptr = LoadSymbol("cuModuleUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod); -} - -CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, hmod, name); -} - -CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUmodule hmod, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, hmod, name); -} - -CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef, hmod, name); -} - -CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfRef, hmod, name); -} - -CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, - void **optionValues, CUlinkState *stateOut) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); - static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numOptions, options, optionValues, stateOut); -} - -CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, - void *data, size_t size, const char *name, - unsigned int numOptions, CUjit_option *options, - void **optionValues) { - using FuncPtr = - CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, - const char *, unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, data, size, name, numOptions, options, - optionValues); -} - -CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, - const char *path, unsigned int numOptions, - CUjit_option *options, void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, - unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, path, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, - size_t *sizeOut) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); - static auto func_ptr = LoadSymbol("cuLinkComplete"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, cubinOut, sizeOut); -} - -CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); - static auto func_ptr = LoadSymbol("cuLinkDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state); -} - -CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); - static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize); -} - -CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, - unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); -} - -CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemFree_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbase, psize, dptr); -} - -CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize); -} - -CUresult CUDAAPI cuMemFreeHost(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, p, Flags); -} - -CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, p); -} - -CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, flags); -} - -CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); - static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pciBusId); -} - -CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, dev); -} - -CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); - static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, event); -} - -CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, - CUipcEventHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); - static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, handle); -} - -CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, dptr); -} - -CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, - unsigned int Flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); - static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, handle, Flags); -} - -CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostUnregister(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount); -} - -CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, - CUdeviceptr srcDevice, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, - size_t srcOffset, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, - CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, - hStream); -} - -CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, - size_t srcOffset, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N); -} - -CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, - size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N); -} - -CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N); -} - -CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height); -} - -CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N, hStream); -} - -CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N, hStream); -} - -CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N, hStream); -} - -CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, - size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); -} - -CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, - const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, - CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray); - static auto func_ptr = LoadSymbol("cuArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hArray); -} - -CUresult CUDAAPI cuArray3DCreate( - CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArray3DGetDescriptor( - CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI -cuMipmappedArrayCreate(CUmipmappedArray *pHandle, - const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); -} - -CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, - CUmipmappedArray hMipmappedArray, - unsigned int level) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pLevelArray, hMipmappedArray, level); -} - -CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); - static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hMipmappedArray); -} - -CUresult CUDAAPI cuPointerGetAttribute(void *data, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, attribute, ptr); -} - -CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, - CUdevice dstDevice, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); - static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, hStream); -} - -CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, - CUmem_advise advice, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); - static auto func_ptr = LoadSymbol("cuMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, - CUmem_range_attribute attribute, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, - CUmem_range_attribute *attributes, - size_t numAttributes, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)( - void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -CUresult CUDAAPI cuPointerSetAttribute(const void *value, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = - CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attribute, ptr); -} - -CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, - CUpointer_attribute *attributes, - void **data, CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, - void **, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numAttributes, attributes, data, ptr); -} - -CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, Flags); -} - -CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, - unsigned int flags, int priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); - static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, flags, priority); -} - -CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); - static auto func_ptr = LoadSymbol("cuStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); - static auto func_ptr = LoadSymbol("cuStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); - static auto func_ptr = LoadSymbol("cuStreamGetCtx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, pctx); -} - -CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, hEvent, Flags); -} - -CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, - CUstreamCallback callback, void *userData, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, callback, userData, flags); -} - -CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); - static auto func_ptr = LoadSymbol("cuStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, phGraph); -} - -CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, - CUstreamCaptureStatus *captureStatus) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus); -} - -CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, - size_t length, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dptr, length, flags); -} - -CUresult CUDAAPI cuStreamQuery(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); - static auto func_ptr = LoadSymbol("cuEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, Flags); -} - -CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); - static auto func_ptr = LoadSymbol("cuEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream); -} - -CUresult CUDAAPI cuEventQuery(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, - CUevent hEnd) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); - static auto func_ptr = LoadSymbol("cuEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMilliseconds, hStart, hEnd); -} - -CUresult CUDAAPI -cuImportExternalMemory(CUexternalMemory *extMem_out, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( - CUdeviceptr *devPtr, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); - static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( - CUmipmappedArray *mipmap, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); - static auto func_ptr = - LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); - static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -CUresult CUDAAPI cuImportExternalSemaphore( - CUexternalSemaphore *extSem_out, - const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)( - CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -CUresult CUDAAPI cuSignalExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuWaitExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, - unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); - static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, - CUstreamBatchMemOpParams *paramArray, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, - CUstreamBatchMemOpParams *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamBatchMemOp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, count, paramArray, flags); -} - -CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, - CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, hfunc); -} - -CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, - CUfunction_attribute attrib, int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); - static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, attrib, value); -} - -CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); - static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, - CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); - static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, - unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams, void **extra) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchCooperativeKernel( - CUfunction f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **); - static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams); -} - -CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( - CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, - void *userData) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); - static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, fn, userData); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, - int y, int z) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); - static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, x, y, z); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, - unsigned int bytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, - unsigned int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSeti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, - float value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); - static auto func_ptr = LoadSymbol("cuParamSetf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, - void *ptr, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, ptr, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction); - static auto func_ptr = LoadSymbol("cuLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, - int grid_height) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); - static auto func_ptr = LoadSymbol("cuLaunchGrid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, - int grid_width, - int grid_height, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); - static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height, hStream); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, - int texunit, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); - static auto func_ptr = LoadSymbol("cuParamSetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, texunit, hTexRef); -} - -CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraph, flags); -} - -CUresult CUDAAPI cuGraphAddKernelNode( - CUgraphNode *phGraphNode, CUgraph hGraph, CUgraphNode *dependencies, - size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, - size_t, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeGetParams( - CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeSetParams( - CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, - size_t, const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - copyParams, ctx); -} - -CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, - CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, - const CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemsetNode( - CUgraphNode *phGraphNode, CUgraph hGraph, CUgraphNode *dependencies, - size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, size_t, - const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - memsetParams, ctx); -} - -CUresult CUDAAPI cuGraphMemsetNodeGetParams( - CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemsetNodeSetParams( - CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, - CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, - size_t, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, - CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeSetParams( - CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - CUgraphNode *dependencies, - size_t numDependencies, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, - size_t, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - childGraph); -} - -CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, - CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); - static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, phGraph); -} - -CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - CUgraphNode *dependencies, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphClone, originalGraph); -} - -CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, - CUgraphNode hOriginalNode, - CUgraph hClonedGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phNode, hOriginalNode, hClonedGraph); -} - -CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, type); -} - -CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, - size_t *numNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, nodes, numNodes); -} - -CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, - size_t *numRootNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, rootNodes, numRootNodes); -} - -CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, size_t *numEdges) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numEdges); -} - -CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, - CUgraphNode *dependencies, - size_t *numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, - CUgraphNode *dependentNodes, - size_t *numDependentNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependentNodes, numDependentNodes); -} - -CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode); -} - -CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, - CUgraphNode *phErrorNode, char *logBuffer, - size_t bufferSize) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *, - char *, size_t); - static auto func_ptr = LoadSymbol("cuGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize); -} - -CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); - static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec); -} - -CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph); - static auto func_ptr = LoadSymbol("cuGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, - CUoccupancyB2DSize, size_t, int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)( - int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit, flags); -} - -CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hArray, Flags); -} - -CUresult CUDAAPI cuTexRefSetMipmappedArray(CUtexref hTexRef, - CUmipmappedArray hMipmappedArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hMipmappedArray, Flags); -} - -CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, - CUdeviceptr dptr, size_t bytes) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ByteOffset, hTexRef, dptr, bytes); -} - -CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, - const CUDA_ARRAY_DESCRIPTOR *desc, - CUdeviceptr dptr, size_t Pitch) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, desc, dptr, Pitch); -} - -CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, - int NumPackedComponents) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); - static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fmt, NumPackedComponents); -} - -CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, - CUaddress_mode am) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, dim, am); -} - -CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -CUresult CUDAAPI cuTexRefSetMipmapFilterMode(CUtexref hTexRef, - CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, bias); -} - -CUresult CUDAAPI cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, - float minMipmapLevelClamp, - float maxMipmapLevelClamp) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); -} - -CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, - unsigned int maxAniso) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, maxAniso); -} - -CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, float *pBorderColor) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); - static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, pBorderColor); -} - -CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, Flags); -} - -CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmappedArray(CUmipmappedArray *phMipmappedArray, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phMipmappedArray, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, - int dim) { - using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); - static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pam, hTexRef, dim); -} - -CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFormat, pNumChannels, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbias, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, - float *pmaxMipmapLevelClamp, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pmaxAniso, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pBorderColor, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); - static auto func_ptr = LoadSymbol("cuTexRefCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef); -} - -CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSurfRef, hArray, Flags); -} - -CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); - static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hSurfRef); -} - -CUresult CUDAAPI -cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, - const CUDA_TEXTURE_DESC *pTexDesc, - const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, - const CUDA_TEXTURE_DESC *, - const CUDA_RESOURCE_VIEW_DESC *); - static auto func_ptr = LoadSymbol("cuTexObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceViewDesc( - CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, - const CUDA_RESOURCE_DESC *pResDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); - static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, - CUdevice peerDev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, dev, peerDev); -} - -CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext, Flags); -} - -CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext); -} - -CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, - CUdevice_P2PAttribute attrib, - CUdevice srcDevice, - CUdevice dstDevice) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attrib, srcDevice, dstDevice); -} - -CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); - static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( - CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, - unsigned int mipLevel) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArray, resource, arrayIndex, mipLevel); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( - CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMipmappedArray, resource); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( - CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevPtr, pSize, resource); -} - -CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, - const CUuuid *pExportTableId) { - using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); - static auto func_ptr = LoadSymbol("cuGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_1.inc deleted file mode 100644 index d35035799a77da..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_1.inc +++ /dev/null @@ -1,2166 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuInit(unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cuInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(Flags); -} - -CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); - static auto func_ptr = LoadSymbol("cuDeviceGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, ordinal); -} - -CUresult CUDAAPI cuDeviceGetCount(int *count) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDeviceGetCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(name, len, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(bytes, dev); -} - -CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, - int *minor, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags); -} - -CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, - int *active) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags, active); -} - -CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags, dev); -} - -CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); - static auto func_ptr = LoadSymbol("cuCtxGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -CUresult CUDAAPI cuCtxSynchronize(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { - using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); - static auto func_ptr = LoadSymbol("cuCtxSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); - static auto func_ptr = LoadSymbol("cuCtxGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pvalue, limit); -} - -CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); - static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pconfig); -} - -CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); - static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); - static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); - static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, version); -} - -CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, - int *greatestPriority) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *); - static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxAttach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDetach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); - static auto func_ptr = LoadSymbol("cuModuleLoad"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fname); -} - -CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image); -} - -CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, - unsigned int numOptions, - CUjit_option *options, - void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, - CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fatCubin); -} - -CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule); - static auto func_ptr = LoadSymbol("cuModuleUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod); -} - -CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, hmod, name); -} - -CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUmodule hmod, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, hmod, name); -} - -CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef, hmod, name); -} - -CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfRef, hmod, name); -} - -CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, - void **optionValues, CUlinkState *stateOut) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); - static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numOptions, options, optionValues, stateOut); -} - -CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, - void *data, size_t size, const char *name, - unsigned int numOptions, CUjit_option *options, - void **optionValues) { - using FuncPtr = - CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, - const char *, unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, data, size, name, numOptions, options, - optionValues); -} - -CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, - const char *path, unsigned int numOptions, - CUjit_option *options, void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, - unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, path, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, - size_t *sizeOut) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); - static auto func_ptr = LoadSymbol("cuLinkComplete"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, cubinOut, sizeOut); -} - -CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); - static auto func_ptr = LoadSymbol("cuLinkDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state); -} - -CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); - static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize); -} - -CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, - unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); -} - -CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemFree_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbase, psize, dptr); -} - -CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize); -} - -CUresult CUDAAPI cuMemFreeHost(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, p, Flags); -} - -CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, p); -} - -CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, flags); -} - -CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); - static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pciBusId); -} - -CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, dev); -} - -CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); - static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, event); -} - -CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, - CUipcEventHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); - static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, handle); -} - -CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, dptr); -} - -CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, - unsigned int Flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); - static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, handle, Flags); -} - -CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostUnregister(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount); -} - -CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, - CUdeviceptr srcDevice, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, - size_t srcOffset, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, - CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, - hStream); -} - -CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, - size_t srcOffset, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N); -} - -CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, - size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N); -} - -CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N); -} - -CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height); -} - -CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N, hStream); -} - -CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N, hStream); -} - -CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N, hStream); -} - -CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, - size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); -} - -CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, - const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, - CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray); - static auto func_ptr = LoadSymbol("cuArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hArray); -} - -CUresult CUDAAPI cuArray3DCreate( - CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArray3DGetDescriptor( - CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI -cuMipmappedArrayCreate(CUmipmappedArray *pHandle, - const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); -} - -CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, - CUmipmappedArray hMipmappedArray, - unsigned int level) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pLevelArray, hMipmappedArray, level); -} - -CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); - static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hMipmappedArray); -} - -CUresult CUDAAPI cuPointerGetAttribute(void *data, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, attribute, ptr); -} - -CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, - CUdevice dstDevice, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); - static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, hStream); -} - -CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, - CUmem_advise advice, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); - static auto func_ptr = LoadSymbol("cuMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, - CUmem_range_attribute attribute, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, - CUmem_range_attribute *attributes, - size_t numAttributes, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)( - void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -CUresult CUDAAPI cuPointerSetAttribute(const void *value, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = - CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attribute, ptr); -} - -CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, - CUpointer_attribute *attributes, - void **data, CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, - void **, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numAttributes, attributes, data, ptr); -} - -CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, Flags); -} - -CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, - unsigned int flags, int priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); - static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, flags, priority); -} - -CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); - static auto func_ptr = LoadSymbol("cuStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); - static auto func_ptr = LoadSymbol("cuStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); - static auto func_ptr = LoadSymbol("cuStreamGetCtx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, pctx); -} - -CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, hEvent, Flags); -} - -CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, - CUstreamCallback callback, void *userData, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, callback, userData, flags); -} - -CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, - CUstreamCaptureMode mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureMode); - static auto func_ptr = LoadSymbol("cuStreamBeginCapture_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, mode); -} - -CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cuThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); - static auto func_ptr = LoadSymbol("cuStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, phGraph); -} - -CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, - CUstreamCaptureStatus *captureStatus) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus); -} - -CUresult CUDAAPI cuStreamGetCaptureInfo(CUstream hStream, - CUstreamCaptureStatus *captureStatus, - cuuint64_t *id) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *); - static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus, id); -} - -CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, - size_t length, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dptr, length, flags); -} - -CUresult CUDAAPI cuStreamQuery(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); - static auto func_ptr = LoadSymbol("cuEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, Flags); -} - -CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); - static auto func_ptr = LoadSymbol("cuEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream); -} - -CUresult CUDAAPI cuEventQuery(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, - CUevent hEnd) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); - static auto func_ptr = LoadSymbol("cuEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMilliseconds, hStart, hEnd); -} - -CUresult CUDAAPI -cuImportExternalMemory(CUexternalMemory *extMem_out, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( - CUdeviceptr *devPtr, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); - static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( - CUmipmappedArray *mipmap, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); - static auto func_ptr = - LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); - static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -CUresult CUDAAPI cuImportExternalSemaphore( - CUexternalSemaphore *extSem_out, - const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)( - CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -CUresult CUDAAPI cuSignalExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuWaitExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, - unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); - static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, - CUstreamBatchMemOpParams *paramArray, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, - CUstreamBatchMemOpParams *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamBatchMemOp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, count, paramArray, flags); -} - -CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, - CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, hfunc); -} - -CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, - CUfunction_attribute attrib, int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); - static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, attrib, value); -} - -CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); - static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, - CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); - static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, - unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams, void **extra) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchCooperativeKernel( - CUfunction f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **); - static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams); -} - -CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( - CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, - void *userData) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); - static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, fn, userData); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, - int y, int z) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); - static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, x, y, z); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, - unsigned int bytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, - unsigned int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSeti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, - float value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); - static auto func_ptr = LoadSymbol("cuParamSetf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, - void *ptr, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, ptr, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction); - static auto func_ptr = LoadSymbol("cuLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, - int grid_height) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); - static auto func_ptr = LoadSymbol("cuLaunchGrid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, - int grid_width, - int grid_height, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); - static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height, hStream); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, - int texunit, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); - static auto func_ptr = LoadSymbol("cuParamSetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, texunit, hTexRef); -} - -CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraph, flags); -} - -CUresult CUDAAPI cuGraphAddKernelNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeGetParams( - CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeSetParams( - CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - copyParams, ctx); -} - -CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, - CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, - const CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemsetNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - memsetParams, ctx); -} - -CUresult CUDAAPI cuGraphMemsetNodeGetParams( - CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemsetNodeSetParams( - CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, - CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeSetParams( - CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - childGraph); -} - -CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, - CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); - static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, phGraph); -} - -CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphClone, originalGraph); -} - -CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, - CUgraphNode hOriginalNode, - CUgraph hClonedGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phNode, hOriginalNode, hClonedGraph); -} - -CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, type); -} - -CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, - size_t *numNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, nodes, numNodes); -} - -CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, - size_t *numRootNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, rootNodes, numRootNodes); -} - -CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, size_t *numEdges) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numEdges); -} - -CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, - CUgraphNode *dependencies, - size_t *numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, - CUgraphNode *dependentNodes, - size_t *numDependentNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependentNodes, numDependentNodes); -} - -CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, - const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode); -} - -CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, - CUgraphNode *phErrorNode, char *logBuffer, - size_t bufferSize) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *, - char *, size_t); - static auto func_ptr = LoadSymbol("cuGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize); -} - -CUresult CUDAAPI -cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); - static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec); -} - -CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph); - static auto func_ptr = LoadSymbol("cuGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, - CUoccupancyB2DSize, size_t, int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)( - int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit, flags); -} - -CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hArray, Flags); -} - -CUresult CUDAAPI cuTexRefSetMipmappedArray(CUtexref hTexRef, - CUmipmappedArray hMipmappedArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hMipmappedArray, Flags); -} - -CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, - CUdeviceptr dptr, size_t bytes) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ByteOffset, hTexRef, dptr, bytes); -} - -CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, - const CUDA_ARRAY_DESCRIPTOR *desc, - CUdeviceptr dptr, size_t Pitch) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, desc, dptr, Pitch); -} - -CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, - int NumPackedComponents) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); - static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fmt, NumPackedComponents); -} - -CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, - CUaddress_mode am) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, dim, am); -} - -CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -CUresult CUDAAPI cuTexRefSetMipmapFilterMode(CUtexref hTexRef, - CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, bias); -} - -CUresult CUDAAPI cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, - float minMipmapLevelClamp, - float maxMipmapLevelClamp) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); -} - -CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, - unsigned int maxAniso) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, maxAniso); -} - -CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, float *pBorderColor) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); - static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, pBorderColor); -} - -CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, Flags); -} - -CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmappedArray(CUmipmappedArray *phMipmappedArray, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phMipmappedArray, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, - int dim) { - using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); - static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pam, hTexRef, dim); -} - -CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFormat, pNumChannels, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbias, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, - float *pmaxMipmapLevelClamp, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pmaxAniso, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pBorderColor, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, hTexRef); -} - -CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); - static auto func_ptr = LoadSymbol("cuTexRefCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef); -} - -CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef); -} - -CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSurfRef, hArray, Flags); -} - -CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); - static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hSurfRef); -} - -CUresult CUDAAPI -cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, - const CUDA_TEXTURE_DESC *pTexDesc, - const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, - const CUDA_TEXTURE_DESC *, - const CUDA_RESOURCE_VIEW_DESC *); - static auto func_ptr = LoadSymbol("cuTexObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceViewDesc( - CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, - const CUDA_RESOURCE_DESC *pResDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); - static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, - CUdevice peerDev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, dev, peerDev); -} - -CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext, Flags); -} - -CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext); -} - -CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, - CUdevice_P2PAttribute attrib, - CUdevice srcDevice, - CUdevice dstDevice) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attrib, srcDevice, dstDevice); -} - -CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); - static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( - CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, - unsigned int mipLevel) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArray, resource, arrayIndex, mipLevel); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( - CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMipmappedArray, resource); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( - CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevPtr, pSize, resource); -} - -CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, - const CUuuid *pExportTableId) { - using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); - static auto func_ptr = LoadSymbol("cuGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_2.inc deleted file mode 100644 index f37fc9d888de44..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_10_2.inc +++ /dev/null @@ -1,2328 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuInit(unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cuInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(Flags); -} - -CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); - static auto func_ptr = LoadSymbol("cuDeviceGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, ordinal); -} - -CUresult CUDAAPI cuDeviceGetCount(int *count) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDeviceGetCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(name, len, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(bytes, dev); -} - -CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, dev); -} - -CUresult CUDAAPI cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, - CUdevice dev, int flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdevice, int); - static auto func_ptr = LoadSymbol("cuDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, dev, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, - int *minor, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags); -} - -CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, - int *active) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags, active); -} - -CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags, dev); -} - -CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); - static auto func_ptr = LoadSymbol("cuCtxGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -CUresult CUDAAPI cuCtxSynchronize(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { - using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); - static auto func_ptr = LoadSymbol("cuCtxSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); - static auto func_ptr = LoadSymbol("cuCtxGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pvalue, limit); -} - -CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); - static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pconfig); -} - -CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); - static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); - static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); - static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, version); -} - -CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, - int *greatestPriority) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *); - static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxAttach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDetach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); - static auto func_ptr = LoadSymbol("cuModuleLoad"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fname); -} - -CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image); -} - -CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, - unsigned int numOptions, - CUjit_option *options, - void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, - CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fatCubin); -} - -CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule); - static auto func_ptr = LoadSymbol("cuModuleUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod); -} - -CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, hmod, name); -} - -CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUmodule hmod, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, hmod, name); -} - -CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef, hmod, name); -} - -CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfRef, hmod, name); -} - -CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, - void **optionValues, CUlinkState *stateOut) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); - static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numOptions, options, optionValues, stateOut); -} - -CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, - void *data, size_t size, const char *name, - unsigned int numOptions, CUjit_option *options, - void **optionValues) { - using FuncPtr = - CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, - const char *, unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, data, size, name, numOptions, options, - optionValues); -} - -CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, - const char *path, unsigned int numOptions, - CUjit_option *options, void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, - unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, path, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, - size_t *sizeOut) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); - static auto func_ptr = LoadSymbol("cuLinkComplete"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, cubinOut, sizeOut); -} - -CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); - static auto func_ptr = LoadSymbol("cuLinkDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state); -} - -CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); - static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize); -} - -CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, - unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); -} - -CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemFree_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbase, psize, dptr); -} - -CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize); -} - -CUresult CUDAAPI cuMemFreeHost(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, p, Flags); -} - -CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, p); -} - -CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, flags); -} - -CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); - static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pciBusId); -} - -CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, dev); -} - -CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); - static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, event); -} - -CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, - CUipcEventHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); - static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, handle); -} - -CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, dptr); -} - -CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, - unsigned int Flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); - static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, handle, Flags); -} - -CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostUnregister(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount); -} - -CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, - CUdeviceptr srcDevice, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, - size_t srcOffset, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, - CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, - hStream); -} - -CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, - size_t srcOffset, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N); -} - -CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, - size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N); -} - -CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N); -} - -CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height); -} - -CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N, hStream); -} - -CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N, hStream); -} - -CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N, hStream); -} - -CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, - size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); -} - -CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, - const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, - CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray); - static auto func_ptr = LoadSymbol("cuArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hArray); -} - -CUresult CUDAAPI cuArray3DCreate( - CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArray3DGetDescriptor( - CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI -cuMipmappedArrayCreate(CUmipmappedArray *pHandle, - const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); -} - -CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, - CUmipmappedArray hMipmappedArray, - unsigned int level) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pLevelArray, hMipmappedArray, level); -} - -CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); - static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hMipmappedArray); -} - -CUresult CUDAAPI cuMemAddressReserve(CUdeviceptr *ptr, size_t size, - size_t alignment, CUdeviceptr addr, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, size_t, - CUdeviceptr, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemAddressReserve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, alignment, addr, flags); -} - -CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemAddressFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, - const CUmemAllocationProp *prop, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, size_t, - const CUmemAllocationProp *, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, size, prop, flags); -} - -CUresult CUDAAPI cuMemRelease(CUmemGenericAllocationHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle); - static auto func_ptr = LoadSymbol("cuMemRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -CUresult CUDAAPI cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, - CUmemGenericAllocationHandle handle, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, size_t, - CUmemGenericAllocationHandle, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemMap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, offset, handle, flags); -} - -CUresult CUDAAPI cuMemUnmap(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemUnmap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemSetAccess(CUdeviceptr ptr, size_t size, - const CUmemAccessDesc *desc, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, desc, count); -} - -CUresult CUDAAPI cuMemGetAccess(unsigned long long *flags, - const CUmemLocation *location, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned long long *, - const CUmemLocation *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, location, ptr); -} - -CUresult CUDAAPI cuMemExportToShareableHandle( - void *shareableHandle, CUmemGenericAllocationHandle handle, - CUmemAllocationHandleType handleType, unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUmemGenericAllocationHandle, - CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemImportFromShareableHandle( - CUmemGenericAllocationHandle *handle, void *osHandle, - CUmemAllocationHandleType shHandleType) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *, - CUmemAllocationHandleType); - static auto func_ptr = LoadSymbol("cuMemImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, osHandle, shHandleType); -} - -CUresult CUDAAPI cuMemGetAllocationGranularity( - size_t *granularity, const CUmemAllocationProp *prop, - CUmemAllocationGranularity_flags option) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, const CUmemAllocationProp *, - CUmemAllocationGranularity_flags); - static auto func_ptr = LoadSymbol("cuMemGetAllocationGranularity"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(granularity, prop, option); -} - -CUresult CUDAAPI cuMemGetAllocationPropertiesFromHandle( - CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAllocationProp *, CUmemGenericAllocationHandle); - static auto func_ptr = - LoadSymbol("cuMemGetAllocationPropertiesFromHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, handle); -} - -CUresult CUDAAPI cuPointerGetAttribute(void *data, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, attribute, ptr); -} - -CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, - CUdevice dstDevice, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); - static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, hStream); -} - -CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, - CUmem_advise advice, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); - static auto func_ptr = LoadSymbol("cuMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, - CUmem_range_attribute attribute, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, - CUmem_range_attribute *attributes, - size_t numAttributes, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)( - void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -CUresult CUDAAPI cuPointerSetAttribute(const void *value, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = - CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attribute, ptr); -} - -CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, - CUpointer_attribute *attributes, - void **data, CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, - void **, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numAttributes, attributes, data, ptr); -} - -CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, Flags); -} - -CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, - unsigned int flags, int priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); - static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, flags, priority); -} - -CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); - static auto func_ptr = LoadSymbol("cuStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); - static auto func_ptr = LoadSymbol("cuStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); - static auto func_ptr = LoadSymbol("cuStreamGetCtx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, pctx); -} - -CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, hEvent, Flags); -} - -CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, - CUstreamCallback callback, void *userData, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, callback, userData, flags); -} - -CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, - CUstreamCaptureMode mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureMode); - static auto func_ptr = LoadSymbol("cuStreamBeginCapture_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, mode); -} - -CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cuThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); - static auto func_ptr = LoadSymbol("cuStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, phGraph); -} - -CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, - CUstreamCaptureStatus *captureStatus) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus); -} - -CUresult CUDAAPI cuStreamGetCaptureInfo(CUstream hStream, - CUstreamCaptureStatus *captureStatus, - cuuint64_t *id) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *); - static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus, id); -} - -CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, - size_t length, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dptr, length, flags); -} - -CUresult CUDAAPI cuStreamQuery(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); - static auto func_ptr = LoadSymbol("cuEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, Flags); -} - -CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); - static auto func_ptr = LoadSymbol("cuEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream); -} - -CUresult CUDAAPI cuEventQuery(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, - CUevent hEnd) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); - static auto func_ptr = LoadSymbol("cuEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMilliseconds, hStart, hEnd); -} - -CUresult CUDAAPI -cuImportExternalMemory(CUexternalMemory *extMem_out, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( - CUdeviceptr *devPtr, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); - static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( - CUmipmappedArray *mipmap, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); - static auto func_ptr = - LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); - static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -CUresult CUDAAPI cuImportExternalSemaphore( - CUexternalSemaphore *extSem_out, - const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)( - CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -CUresult CUDAAPI cuSignalExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuWaitExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, - unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); - static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, - CUstreamBatchMemOpParams *paramArray, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, - CUstreamBatchMemOpParams *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamBatchMemOp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, count, paramArray, flags); -} - -CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, - CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, hfunc); -} - -CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, - CUfunction_attribute attrib, int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); - static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, attrib, value); -} - -CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); - static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, - CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); - static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, - unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams, void **extra) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchCooperativeKernel( - CUfunction f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **); - static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams); -} - -CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( - CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, - void *userData) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); - static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, fn, userData); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, - int y, int z) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); - static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, x, y, z); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, - unsigned int bytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, - unsigned int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSeti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, - float value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); - static auto func_ptr = LoadSymbol("cuParamSetf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, - void *ptr, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, ptr, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction); - static auto func_ptr = LoadSymbol("cuLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, - int grid_height) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); - static auto func_ptr = LoadSymbol("cuLaunchGrid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, - int grid_width, - int grid_height, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); - static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height, hStream); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, - int texunit, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); - static auto func_ptr = LoadSymbol("cuParamSetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, texunit, hTexRef); -} - -CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraph, flags); -} - -CUresult CUDAAPI cuGraphAddKernelNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeGetParams( - CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeSetParams( - CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - copyParams, ctx); -} - -CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, - CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, - const CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemsetNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - memsetParams, ctx); -} - -CUresult CUDAAPI cuGraphMemsetNodeGetParams( - CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemsetNodeSetParams( - CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, - CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeSetParams( - CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - childGraph); -} - -CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, - CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); - static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, phGraph); -} - -CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphClone, originalGraph); -} - -CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, - CUgraphNode hOriginalNode, - CUgraph hClonedGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phNode, hOriginalNode, hClonedGraph); -} - -CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, type); -} - -CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, - size_t *numNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, nodes, numNodes); -} - -CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, - size_t *numRootNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, rootNodes, numRootNodes); -} - -CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, size_t *numEdges) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numEdges); -} - -CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, - CUgraphNode *dependencies, - size_t *numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, - CUgraphNode *dependentNodes, - size_t *numDependentNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependentNodes, numDependentNodes); -} - -CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, - const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode); -} - -CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, - CUgraphNode *phErrorNode, char *logBuffer, - size_t bufferSize) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *, - char *, size_t); - static auto func_ptr = LoadSymbol("cuGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize); -} - -CUresult CUDAAPI -cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, copyParams, ctx); -} - -CUresult CUDAAPI cuGraphExecMemsetNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)( - CUgraphExec, CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, memsetParams, ctx); -} - -CUresult CUDAAPI -cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); - static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec); -} - -CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph); - static auto func_ptr = LoadSymbol("cuGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph); -} - -CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, - CUgraphNode *hErrorNode_out, - CUgraphExecUpdateResult *updateResult_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraph, CUgraphNode *, - CUgraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cuGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, - CUoccupancyB2DSize, size_t, int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)( - int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit, flags); -} - -CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hArray, Flags); -} - -CUresult CUDAAPI cuTexRefSetMipmappedArray(CUtexref hTexRef, - CUmipmappedArray hMipmappedArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hMipmappedArray, Flags); -} - -CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, - CUdeviceptr dptr, size_t bytes) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ByteOffset, hTexRef, dptr, bytes); -} - -CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, - const CUDA_ARRAY_DESCRIPTOR *desc, - CUdeviceptr dptr, size_t Pitch) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, desc, dptr, Pitch); -} - -CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, - int NumPackedComponents) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); - static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fmt, NumPackedComponents); -} - -CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, - CUaddress_mode am) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, dim, am); -} - -CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -CUresult CUDAAPI cuTexRefSetMipmapFilterMode(CUtexref hTexRef, - CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, bias); -} - -CUresult CUDAAPI cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, - float minMipmapLevelClamp, - float maxMipmapLevelClamp) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); -} - -CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, - unsigned int maxAniso) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, maxAniso); -} - -CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, float *pBorderColor) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); - static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, pBorderColor); -} - -CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, Flags); -} - -CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmappedArray(CUmipmappedArray *phMipmappedArray, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phMipmappedArray, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, - int dim) { - using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); - static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pam, hTexRef, dim); -} - -CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFormat, pNumChannels, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbias, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, - float *pmaxMipmapLevelClamp, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pmaxAniso, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pBorderColor, hTexRef); -} - -CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, hTexRef); -} - -CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); - static auto func_ptr = LoadSymbol("cuTexRefCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef); -} - -CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef); -} - -CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSurfRef, hArray, Flags); -} - -CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); - static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hSurfRef); -} - -CUresult CUDAAPI -cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, - const CUDA_TEXTURE_DESC *pTexDesc, - const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, - const CUDA_TEXTURE_DESC *, - const CUDA_RESOURCE_VIEW_DESC *); - static auto func_ptr = LoadSymbol("cuTexObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceViewDesc( - CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, - const CUDA_RESOURCE_DESC *pResDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); - static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, - CUdevice peerDev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, dev, peerDev); -} - -CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext, Flags); -} - -CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext); -} - -CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, - CUdevice_P2PAttribute attrib, - CUdevice srcDevice, - CUdevice dstDevice) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attrib, srcDevice, dstDevice); -} - -CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); - static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( - CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, - unsigned int mipLevel) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArray, resource, arrayIndex, mipLevel); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( - CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMipmappedArray, resource); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( - CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevPtr, pSize, resource); -} - -CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, - const CUuuid *pExportTableId) { - using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); - static auto func_ptr = LoadSymbol("cuGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_11_0.inc deleted file mode 100644 index 8fb62f8bf5d8b6..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_11_0.inc +++ /dev/null @@ -1,2943 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuInit(unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cuInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(Flags); -} - -CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); - static auto func_ptr = LoadSymbol("cuDeviceGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, ordinal); -} - -CUresult CUDAAPI cuDeviceGetCount(int *count) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDeviceGetCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(name, len, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, unsigned int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetLuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(luid, deviceNodeMask, dev); -} - -CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(bytes, dev); -} - -CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, - CUarray_format format, - unsigned numChannels, - CUdevice dev) { - using FuncPtr = - CUresult(CUDAAPI *)(size_t *, CUarray_format, unsigned int, CUdevice); - static auto func_ptr = - LoadSymbol("cuDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, format, numChannels, dev); -} - -CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, dev); -} - -CUresult CUDAAPI cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, - CUdevice dev, int flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdevice, int); - static auto func_ptr = LoadSymbol("cuDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, dev, flags); -} - -#if CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUmemoryPool); - static auto func_ptr = LoadSymbol("cuDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pool); -} - -CUresult CUDAAPI cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, dev); -} - -CUresult CUDAAPI cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool_out, dev); -} - -#endif // CUDA_VERSION >= 11020 - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, - int *minor, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags); -} - -CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, - int *active) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags, active); -} - -CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags, dev); -} - -CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); - static auto func_ptr = LoadSymbol("cuCtxGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -CUresult CUDAAPI cuCtxSynchronize(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { - using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); - static auto func_ptr = LoadSymbol("cuCtxSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); - static auto func_ptr = LoadSymbol("cuCtxGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pvalue, limit); -} - -CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); - static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pconfig); -} - -CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); - static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); - static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); - static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, version); -} - -CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, - int *greatestPriority) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *); - static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -CUresult CUDAAPI cuCtxResetPersistingL2Cache(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxAttach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDetach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); - static auto func_ptr = LoadSymbol("cuModuleLoad"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fname); -} - -CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image); -} - -CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, - unsigned int numOptions, - CUjit_option *options, - void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, - CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fatCubin); -} - -CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule); - static auto func_ptr = LoadSymbol("cuModuleUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod); -} - -CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, hmod, name); -} - -CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUmodule hmod, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, hmod, name); -} - -CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef, hmod, name); -} - -CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfRef, hmod, name); -} - -CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, - void **optionValues, CUlinkState *stateOut) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); - static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numOptions, options, optionValues, stateOut); -} - -CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, - void *data, size_t size, const char *name, - unsigned int numOptions, CUjit_option *options, - void **optionValues) { - using FuncPtr = - CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, - const char *, unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, data, size, name, numOptions, options, - optionValues); -} - -CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, - const char *path, unsigned int numOptions, - CUjit_option *options, void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, - unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, path, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, - size_t *sizeOut) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); - static auto func_ptr = LoadSymbol("cuLinkComplete"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, cubinOut, sizeOut); -} - -CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); - static auto func_ptr = LoadSymbol("cuLinkDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state); -} - -CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); - static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize); -} - -CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, - unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); -} - -CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemFree_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbase, psize, dptr); -} - -CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize); -} - -CUresult CUDAAPI cuMemFreeHost(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, p, Flags); -} - -CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, p); -} - -CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, flags); -} - -CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); - static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pciBusId); -} - -CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, dev); -} - -CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); - static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, event); -} - -CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, - CUipcEventHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); - static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, handle); -} - -CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, dptr); -} - -CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, - unsigned int Flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); - static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, handle, Flags); -} - -CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostUnregister(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount); -} - -CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, - CUdeviceptr srcDevice, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, - size_t srcOffset, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, - CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, - hStream); -} - -CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, - size_t srcOffset, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N); -} - -CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, - size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N); -} - -CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N); -} - -CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height); -} - -CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N, hStream); -} - -CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N, hStream); -} - -CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N, hStream); -} - -CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, - size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); -} - -CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, - const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, - CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -#if CUDA_VERSION >= 11100 - -CUresult CUDAAPI cuArrayGetSparseProperties( - CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUarray array) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -CUresult CUDAAPI cuMipmappedArrayGetSparseProperties( - CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUmipmappedArray mipmap) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUmipmappedArray); - static auto func_ptr = - LoadSymbol("cuMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -#endif // CUDA_VERSION >= 11100 - -CUresult CUDAAPI cuArrayGetPlane(CUarray *pPlaneArray, CUarray hArray, - unsigned int planeIdx) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray); - static auto func_ptr = LoadSymbol("cuArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hArray); -} - -CUresult CUDAAPI cuArray3DCreate( - CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArray3DGetDescriptor( - CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI -cuMipmappedArrayCreate(CUmipmappedArray *pHandle, - const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); -} - -CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, - CUmipmappedArray hMipmappedArray, - unsigned int level) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pLevelArray, hMipmappedArray, level); -} - -CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); - static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hMipmappedArray); -} - -CUresult CUDAAPI cuMemAddressReserve(CUdeviceptr *ptr, size_t size, - size_t alignment, CUdeviceptr addr, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, size_t, - CUdeviceptr, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemAddressReserve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, alignment, addr, flags); -} - -CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemAddressFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, - const CUmemAllocationProp *prop, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, size_t, - const CUmemAllocationProp *, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, size, prop, flags); -} - -CUresult CUDAAPI cuMemRelease(CUmemGenericAllocationHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle); - static auto func_ptr = LoadSymbol("cuMemRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -CUresult CUDAAPI cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, - CUmemGenericAllocationHandle handle, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, size_t, - CUmemGenericAllocationHandle, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemMap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, offset, handle, flags); -} - -#if CUDA_VERSION >= 11100 - -CUresult CUDAAPI cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, - unsigned int count, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUarrayMapInfo *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuMemMapArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mapInfoList, count, hStream); -} - -#endif // CUDA_VERSION >= 11100 - -CUresult CUDAAPI cuMemUnmap(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemUnmap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemSetAccess(CUdeviceptr ptr, size_t size, - const CUmemAccessDesc *desc, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, desc, count); -} - -CUresult CUDAAPI cuMemGetAccess(unsigned long long *flags, - const CUmemLocation *location, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned long long *, - const CUmemLocation *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, location, ptr); -} - -CUresult CUDAAPI cuMemExportToShareableHandle( - void *shareableHandle, CUmemGenericAllocationHandle handle, - CUmemAllocationHandleType handleType, unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUmemGenericAllocationHandle, - CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemImportFromShareableHandle( - CUmemGenericAllocationHandle *handle, void *osHandle, - CUmemAllocationHandleType shHandleType) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *, - CUmemAllocationHandleType); - static auto func_ptr = LoadSymbol("cuMemImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, osHandle, shHandleType); -} - -CUresult CUDAAPI cuMemGetAllocationGranularity( - size_t *granularity, const CUmemAllocationProp *prop, - CUmemAllocationGranularity_flags option) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, const CUmemAllocationProp *, - CUmemAllocationGranularity_flags); - static auto func_ptr = LoadSymbol("cuMemGetAllocationGranularity"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(granularity, prop, option); -} - -CUresult CUDAAPI cuMemGetAllocationPropertiesFromHandle( - CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAllocationProp *, CUmemGenericAllocationHandle); - static auto func_ptr = - LoadSymbol("cuMemGetAllocationPropertiesFromHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, handle); -} - -CUresult CUDAAPI -cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *); - static auto func_ptr = LoadSymbol("cuMemRetainAllocationHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, addr); -} - -CUresult CUDAAPI cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUstream); - static auto func_ptr = LoadSymbol("cuMemFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, hStream); -} - -CUresult CUDAAPI cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemAllocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, hStream); -} - -#if CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool, size_t); - static auto func_ptr = LoadSymbol("cuMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, minBytesToKeep); -} - -CUresult CUDAAPI cuMemPoolSetAttribute(CUmemoryPool pool, - CUmemPool_attribute attr, void *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); - static auto func_ptr = LoadSymbol("cuMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, attr, value); -} - -CUresult CUDAAPI cuMemPoolGetAttribute(CUmemoryPool pool, - CUmemPool_attribute attr, void *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); - static auto func_ptr = LoadSymbol("cuMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, attr, value); -} - -CUresult CUDAAPI cuMemPoolSetAccess(CUmemoryPool pool, - const CUmemAccessDesc *map, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, map, count); -} - -CUresult CUDAAPI cuMemPoolGetAccess(CUmemAccess_flags *flags, - CUmemoryPool memPool, - CUmemLocation *location) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAccess_flags *, CUmemoryPool, CUmemLocation *); - static auto func_ptr = LoadSymbol("cuMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -CUresult CUDAAPI cuMemPoolCreate(CUmemoryPool *pool, - const CUmemPoolProps *poolProps) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, const CUmemPoolProps *); - static auto func_ptr = LoadSymbol("cuMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, poolProps); -} - -CUresult CUDAAPI cuMemPoolDestroy(CUmemoryPool pool) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool); - static auto func_ptr = LoadSymbol("cuMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool); -} - -CUresult CUDAAPI cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, - CUmemoryPool pool, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUmemoryPool, CUstream); - static auto func_ptr = LoadSymbol("cuMemAllocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, pool, hStream); -} - -CUresult CUDAAPI cuMemPoolExportToShareableHandle( - void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)( - void *, CUmemoryPool, CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = - LoadSymbol("cuMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle_out, pool, handleType, flags); -} - -CUresult CUDAAPI cuMemPoolImportFromShareableHandle( - CUmemoryPool *pool_out, void *handle, CUmemAllocationHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmemoryPool *, void *, CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = - LoadSymbol("cuMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool_out, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemPoolPtrExportData *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareData_out, ptr); -} - -CUresult CUDAAPI cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, - CUmemPoolPtrExportData *shareData) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUmemoryPool, - CUmemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cuMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr_out, pool, shareData); -} - -#endif // CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuPointerGetAttribute(void *data, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, attribute, ptr); -} - -CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, - CUdevice dstDevice, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); - static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, hStream); -} - -CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, - CUmem_advise advice, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); - static auto func_ptr = LoadSymbol("cuMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, - CUmem_range_attribute attribute, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, - CUmem_range_attribute *attributes, - size_t numAttributes, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)( - void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -CUresult CUDAAPI cuPointerSetAttribute(const void *value, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = - CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attribute, ptr); -} - -CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, - CUpointer_attribute *attributes, - void **data, CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, - void **, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numAttributes, attributes, data, ptr); -} - -CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, Flags); -} - -CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, - unsigned int flags, int priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); - static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, flags, priority); -} - -CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); - static auto func_ptr = LoadSymbol("cuStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); - static auto func_ptr = LoadSymbol("cuStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); - static auto func_ptr = LoadSymbol("cuStreamGetCtx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, pctx); -} - -CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, hEvent, Flags); -} - -CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, - CUstreamCallback callback, void *userData, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, callback, userData, flags); -} - -CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, - CUstreamCaptureMode mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureMode); - static auto func_ptr = LoadSymbol("cuStreamBeginCapture_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, mode); -} - -CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cuThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); - static auto func_ptr = LoadSymbol("cuStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, phGraph); -} - -CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, - CUstreamCaptureStatus *captureStatus) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus); -} - -CUresult CUDAAPI cuStreamGetCaptureInfo( - CUstream hStream, CUstreamCaptureStatus *captureStatus_out, - cuuint64_t *id_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *); - static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus_out, id_out); -} - -CUresult CUDAAPI cuStreamGetCaptureInfo_v2( - CUstream hStream, CUstreamCaptureStatus *captureStatus_out, - cuuint64_t *id_out, CUgraph *graph_out, - const CUgraphNode **dependencies_out, size_t *numDependencies_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *, - CUgraph *, const CUgraphNode **, size_t *); - static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -CUresult CUDAAPI cuStreamUpdateCaptureDependencies(CUstream hStream, - CUgraphNode *dependencies, - size_t numDependencies, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUgraphNode *, size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cuStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dependencies, numDependencies, flags); -} - -CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, - size_t length, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dptr, length, flags); -} - -CUresult CUDAAPI cuStreamQuery(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamCopyAttributes(CUstream dst, CUstream src) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstream); - static auto func_ptr = LoadSymbol("cuStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -CUresult CUDAAPI cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, - CUstreamAttrValue *value_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, CUstreamAttrValue *); - static auto func_ptr = LoadSymbol("cuStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -CUresult CUDAAPI cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, - const CUstreamAttrValue *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, const CUstreamAttrValue *); - static auto func_ptr = LoadSymbol("cuStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); - static auto func_ptr = LoadSymbol("cuEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, Flags); -} - -CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); - static auto func_ptr = LoadSymbol("cuEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream); -} - -CUresult CUDAAPI cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream, unsigned int); - static auto func_ptr = LoadSymbol("cuEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream, flags); -} - -CUresult CUDAAPI cuEventQuery(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, - CUevent hEnd) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); - static auto func_ptr = LoadSymbol("cuEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMilliseconds, hStart, hEnd); -} - -CUresult CUDAAPI -cuImportExternalMemory(CUexternalMemory *extMem_out, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( - CUdeviceptr *devPtr, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); - static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( - CUmipmappedArray *mipmap, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); - static auto func_ptr = - LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); - static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -CUresult CUDAAPI cuImportExternalSemaphore( - CUexternalSemaphore *extSem_out, - const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)( - CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -CUresult CUDAAPI cuSignalExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuWaitExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, - unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); - static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, - CUstreamBatchMemOpParams *paramArray, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, - CUstreamBatchMemOpParams *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamBatchMemOp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, count, paramArray, flags); -} - -CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, - CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, hfunc); -} - -CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, - CUfunction_attribute attrib, int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); - static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, attrib, value); -} - -CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); - static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, - CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); - static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncGetModule(CUmodule *hmod, CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetModule"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod, hfunc); -} - -CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, - unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams, void **extra) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchCooperativeKernel( - CUfunction f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **); - static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( - CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, - void *userData) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); - static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, fn, userData); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, - int y, int z) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); - static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, x, y, z); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, - unsigned int bytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, - unsigned int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSeti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, - float value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); - static auto func_ptr = LoadSymbol("cuParamSetf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, - void *ptr, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, ptr, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction); - static auto func_ptr = LoadSymbol("cuLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, - int grid_height) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); - static auto func_ptr = LoadSymbol("cuLaunchGrid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, - int grid_width, - int grid_height, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); - static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height, hStream); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, - int texunit, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); - static auto func_ptr = LoadSymbol("cuParamSetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, texunit, hTexRef); -} - -CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraph, flags); -} - -CUresult CUDAAPI cuGraphAddKernelNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeGetParams( - CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeSetParams( - CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - copyParams, ctx); -} - -CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, - CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, - const CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemsetNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - memsetParams, ctx); -} - -CUresult CUDAAPI cuGraphMemsetNodeGetParams( - CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemsetNodeSetParams( - CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, - CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeSetParams( - CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - childGraph); -} - -CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, - CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); - static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, phGraph); -} - -CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUevent); - static auto func_ptr = LoadSymbol("cuGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); -} - -CUresult CUDAAPI cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, - CUevent *event_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); - static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event_out); -} - -CUresult CUDAAPI cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); - static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event); -} - -CUresult CUDAAPI cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUevent); - static auto func_ptr = LoadSymbol("cuGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); -} - -CUresult CUDAAPI cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, - CUevent *event_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); - static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event_out); -} - -CUresult CUDAAPI cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); - static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event); -} - -#if CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuGraphAddExternalSemaphoresSignalNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeGetParams( - CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeSetParams( - CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddExternalSemaphoresWaitNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeGetParams( - CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeSetParams( - CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -#endif // CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphClone, originalGraph); -} - -CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, - CUgraphNode hOriginalNode, - CUgraph hClonedGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phNode, hOriginalNode, hClonedGraph); -} - -CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, type); -} - -CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, - size_t *numNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, nodes, numNodes); -} - -CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, - size_t *numRootNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, rootNodes, numRootNodes); -} - -CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, size_t *numEdges) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numEdges); -} - -CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, - CUgraphNode *dependencies, - size_t *numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, - CUgraphNode *dependentNodes, - size_t *numDependentNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependentNodes, numDependentNodes); -} - -CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, - const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode); -} - -CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, - CUgraphNode *phErrorNode, char *logBuffer, - size_t bufferSize) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *, - char *, size_t); - static auto func_ptr = LoadSymbol("cuGraphInstantiate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize); -} - -CUresult CUDAAPI -cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, copyParams, ctx); -} - -CUresult CUDAAPI cuGraphExecMemsetNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)( - CUgraphExec, CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, memsetParams, ctx); -} - -CUresult CUDAAPI -cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUgraph); - static auto func_ptr = - LoadSymbol("cuGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, childGraph); -} - -CUresult CUDAAPI cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); - static auto func_ptr = - LoadSymbol("cuGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -CUresult CUDAAPI cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); - static auto func_ptr = - LoadSymbol("cuGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -#if CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuGraphExecExternalSemaphoresSignalNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecExternalSemaphoresWaitNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -#endif // CUDA_VERSION >= 11020 - -CUresult CUDAAPI cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); - static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec); -} - -CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph); - static auto func_ptr = LoadSymbol("cuGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph); -} - -CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, - CUgraphNode *hErrorNode_out, - CUgraphExecUpdateResult *updateResult_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraph, CUgraphNode *, - CUgraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cuGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -CUresult CUDAAPI cuGraphKernelNodeCopyAttributes(CUgraphNode dst, - CUgraphNode src) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -CUresult CUDAAPI -cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, - CUkernelNodeAttrValue *value_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, - CUkernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -CUresult CUDAAPI -cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, - const CUkernelNodeAttrValue *value) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, - const CUkernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -CUresult CUDAAPI cuGraphDebugDotPrint(CUgraph hGraph, const char *path, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, path, flags); -} - -#if CUDA_VERSION >= 11030 - -CUresult CUDAAPI cuUserObjectCreate(CUuserObject *object_out, void *ptr, - CUhostFn destroy, - unsigned int initialRefcount, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUuserObject *, void *, CUhostFn, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cuUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -CUresult CUDAAPI cuUserObjectRetain(CUuserObject object, unsigned int count) { - using FuncPtr = CUresult(CUDAAPI *)(CUuserObject, unsigned int); - static auto func_ptr = LoadSymbol("cuUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -CUresult CUDAAPI cuUserObjectRelease(CUuserObject object, unsigned int count) { - using FuncPtr = CUresult(CUDAAPI *)(CUuserObject, unsigned int); - static auto func_ptr = LoadSymbol("cuUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -CUresult CUDAAPI cuGraphRetainUserObject(CUgraph graph, CUuserObject object, - unsigned int count, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUuserObject, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -CUresult CUDAAPI cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, - unsigned int count) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUuserObject, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -#endif // CUDA_VERSION >= 11030 - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, - CUoccupancyB2DSize, size_t, int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)( - int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit, flags); -} - -CUresult CUDAAPI cuOccupancyAvailableDynamicSMemPerBlock( - size_t *dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUfunction, int, int); - static auto func_ptr = - LoadSymbol("cuOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, - CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmappedArray( - CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hMipmappedArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, - CUtexref hTexRef, - CUdeviceptr dptr, - size_t bytes) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ByteOffset, hTexRef, dptr, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, - CUdeviceptr dptr, size_t Pitch) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, desc, dptr, Pitch); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, - CUarray_format fmt, - int NumPackedComponents) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); - static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fmt, NumPackedComponents); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, - int dim, - CUaddress_mode am) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, dim, am); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, - CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, - float bias) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, bias); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelClamp( - CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, maxAniso); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, - float *pBorderColor) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); - static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, pBorderColor); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmappedArray( - CUmipmappedArray *phMipmappedArray, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phMipmappedArray, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, - CUtexref hTexRef, - int dim) { - using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); - static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pam, hTexRef, dim); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, - int *pNumChannels, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFormat, pNumChannels, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbias, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, - float *pmaxMipmapLevelClamp, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pmaxAniso, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pBorderColor, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); - static auto func_ptr = LoadSymbol("cuTexRefCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, - CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSurfRef, hArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, - CUsurfref hSurfRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); - static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hSurfRef); -} - -CUresult CUDAAPI -cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, - const CUDA_TEXTURE_DESC *pTexDesc, - const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, - const CUDA_TEXTURE_DESC *, - const CUDA_RESOURCE_VIEW_DESC *); - static auto func_ptr = LoadSymbol("cuTexObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceViewDesc( - CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, - const CUDA_RESOURCE_DESC *pResDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); - static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, - CUdevice peerDev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, dev, peerDev); -} - -CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext, Flags); -} - -CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext); -} - -CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, - CUdevice_P2PAttribute attrib, - CUdevice srcDevice, - CUdevice dstDevice) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attrib, srcDevice, dstDevice); -} - -CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); - static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( - CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, - unsigned int mipLevel) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArray, resource, arrayIndex, mipLevel); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( - CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMipmappedArray, resource); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( - CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevPtr, pSize, resource); -} - -CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGetProcAddress(const char *symbol, void **pfn, - int cudaVersion, cuuint64_t flags) { - using FuncPtr = CUresult(CUDAAPI *)(const char *, void **, int, cuuint64_t); - static auto func_ptr = LoadSymbol("cuGetProcAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, pfn, cudaVersion, flags); -} - -CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, - const CUuuid *pExportTableId) { - using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); - static auto func_ptr = LoadSymbol("cuGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -#if CUDA_VERSION >= 11030 - -CUresult CUDAAPI -cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, - CUflushGPUDirectRDMAWritesScope scope) { - using FuncPtr = CUresult(CUDAAPI *)(CUflushGPUDirectRDMAWritesTarget, - CUflushGPUDirectRDMAWritesScope); - static auto func_ptr = LoadSymbol("cuFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -#endif // CUDA_VERSION >= 11030 - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_11_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_11_2.inc deleted file mode 100644 index 7153901f8e065b..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_11_2.inc +++ /dev/null @@ -1,2816 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuInit(unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cuInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(Flags); -} - -CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); - static auto func_ptr = LoadSymbol("cuDeviceGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, ordinal); -} - -CUresult CUDAAPI cuDeviceGetCount(int *count) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDeviceGetCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(name, len, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, unsigned int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetLuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(luid, deviceNodeMask, dev); -} - -CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(bytes, dev); -} - -CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, - CUarray_format format, - unsigned numChannels, - CUdevice dev) { - using FuncPtr = - CUresult(CUDAAPI *)(size_t *, CUarray_format, unsigned int, CUdevice); - static auto func_ptr = - LoadSymbol("cuDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, format, numChannels, dev); -} - -CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, dev); -} - -CUresult CUDAAPI cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, - CUdevice dev, int flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdevice, int); - static auto func_ptr = LoadSymbol("cuDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, dev, flags); -} - -CUresult CUDAAPI cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUmemoryPool); - static auto func_ptr = LoadSymbol("cuDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pool); -} - -CUresult CUDAAPI cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, dev); -} - -CUresult CUDAAPI cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool_out, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, - int *minor, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags); -} - -CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, - int *active) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags, active); -} - -CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags, dev); -} - -CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); - static auto func_ptr = LoadSymbol("cuCtxGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -CUresult CUDAAPI cuCtxSynchronize(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { - using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); - static auto func_ptr = LoadSymbol("cuCtxSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); - static auto func_ptr = LoadSymbol("cuCtxGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pvalue, limit); -} - -CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); - static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pconfig); -} - -CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); - static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); - static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); - static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, version); -} - -CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, - int *greatestPriority) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *); - static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -CUresult CUDAAPI cuCtxResetPersistingL2Cache(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxAttach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDetach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); - static auto func_ptr = LoadSymbol("cuModuleLoad"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fname); -} - -CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image); -} - -CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, - unsigned int numOptions, - CUjit_option *options, - void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, - CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fatCubin); -} - -CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule); - static auto func_ptr = LoadSymbol("cuModuleUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod); -} - -CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, hmod, name); -} - -CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUmodule hmod, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, hmod, name); -} - -CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef, hmod, name); -} - -CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfRef, hmod, name); -} - -CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, - void **optionValues, CUlinkState *stateOut) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); - static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numOptions, options, optionValues, stateOut); -} - -CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, - void *data, size_t size, const char *name, - unsigned int numOptions, CUjit_option *options, - void **optionValues) { - using FuncPtr = - CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, - const char *, unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, data, size, name, numOptions, options, - optionValues); -} - -CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, - const char *path, unsigned int numOptions, - CUjit_option *options, void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, - unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, path, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, - size_t *sizeOut) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); - static auto func_ptr = LoadSymbol("cuLinkComplete"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, cubinOut, sizeOut); -} - -CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); - static auto func_ptr = LoadSymbol("cuLinkDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state); -} - -CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); - static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize); -} - -CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, - unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); -} - -CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemFree_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbase, psize, dptr); -} - -CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize); -} - -CUresult CUDAAPI cuMemFreeHost(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, p, Flags); -} - -CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, p); -} - -CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, flags); -} - -CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); - static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pciBusId); -} - -CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, dev); -} - -CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); - static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, event); -} - -CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, - CUipcEventHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); - static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, handle); -} - -CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, dptr); -} - -CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, - unsigned int Flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); - static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, handle, Flags); -} - -CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostUnregister(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount); -} - -CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, - CUdeviceptr srcDevice, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, - size_t srcOffset, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, - CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, - hStream); -} - -CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, - size_t srcOffset, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N); -} - -CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, - size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N); -} - -CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N); -} - -CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height); -} - -CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N, hStream); -} - -CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N, hStream); -} - -CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N, hStream); -} - -CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, - size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); -} - -CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, - const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, - CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI cuArrayGetSparseProperties( - CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUarray array) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -CUresult CUDAAPI cuMipmappedArrayGetSparseProperties( - CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUmipmappedArray mipmap) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUmipmappedArray); - static auto func_ptr = - LoadSymbol("cuMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -CUresult CUDAAPI cuArrayGetPlane(CUarray *pPlaneArray, CUarray hArray, - unsigned int planeIdx) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray); - static auto func_ptr = LoadSymbol("cuArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hArray); -} - -CUresult CUDAAPI cuArray3DCreate( - CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArray3DGetDescriptor( - CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI -cuMipmappedArrayCreate(CUmipmappedArray *pHandle, - const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); -} - -CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, - CUmipmappedArray hMipmappedArray, - unsigned int level) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pLevelArray, hMipmappedArray, level); -} - -CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); - static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hMipmappedArray); -} - -CUresult CUDAAPI cuMemAddressReserve(CUdeviceptr *ptr, size_t size, - size_t alignment, CUdeviceptr addr, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, size_t, - CUdeviceptr, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemAddressReserve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, alignment, addr, flags); -} - -CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemAddressFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, - const CUmemAllocationProp *prop, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, size_t, - const CUmemAllocationProp *, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, size, prop, flags); -} - -CUresult CUDAAPI cuMemRelease(CUmemGenericAllocationHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle); - static auto func_ptr = LoadSymbol("cuMemRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -CUresult CUDAAPI cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, - CUmemGenericAllocationHandle handle, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, size_t, - CUmemGenericAllocationHandle, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemMap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, offset, handle, flags); -} - -CUresult CUDAAPI cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, - unsigned int count, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUarrayMapInfo *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuMemMapArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mapInfoList, count, hStream); -} - -CUresult CUDAAPI cuMemUnmap(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemUnmap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemSetAccess(CUdeviceptr ptr, size_t size, - const CUmemAccessDesc *desc, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, desc, count); -} - -CUresult CUDAAPI cuMemGetAccess(unsigned long long *flags, - const CUmemLocation *location, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned long long *, - const CUmemLocation *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, location, ptr); -} - -CUresult CUDAAPI cuMemExportToShareableHandle( - void *shareableHandle, CUmemGenericAllocationHandle handle, - CUmemAllocationHandleType handleType, unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUmemGenericAllocationHandle, - CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemImportFromShareableHandle( - CUmemGenericAllocationHandle *handle, void *osHandle, - CUmemAllocationHandleType shHandleType) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *, - CUmemAllocationHandleType); - static auto func_ptr = LoadSymbol("cuMemImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, osHandle, shHandleType); -} - -CUresult CUDAAPI cuMemGetAllocationGranularity( - size_t *granularity, const CUmemAllocationProp *prop, - CUmemAllocationGranularity_flags option) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, const CUmemAllocationProp *, - CUmemAllocationGranularity_flags); - static auto func_ptr = LoadSymbol("cuMemGetAllocationGranularity"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(granularity, prop, option); -} - -CUresult CUDAAPI cuMemGetAllocationPropertiesFromHandle( - CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAllocationProp *, CUmemGenericAllocationHandle); - static auto func_ptr = - LoadSymbol("cuMemGetAllocationPropertiesFromHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, handle); -} - -CUresult CUDAAPI -cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *); - static auto func_ptr = LoadSymbol("cuMemRetainAllocationHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, addr); -} - -CUresult CUDAAPI cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUstream); - static auto func_ptr = LoadSymbol("cuMemFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, hStream); -} - -CUresult CUDAAPI cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemAllocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, hStream); -} - -CUresult CUDAAPI cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool, size_t); - static auto func_ptr = LoadSymbol("cuMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, minBytesToKeep); -} - -CUresult CUDAAPI cuMemPoolSetAttribute(CUmemoryPool pool, - CUmemPool_attribute attr, void *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); - static auto func_ptr = LoadSymbol("cuMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, attr, value); -} - -CUresult CUDAAPI cuMemPoolGetAttribute(CUmemoryPool pool, - CUmemPool_attribute attr, void *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); - static auto func_ptr = LoadSymbol("cuMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, attr, value); -} - -CUresult CUDAAPI cuMemPoolSetAccess(CUmemoryPool pool, - const CUmemAccessDesc *map, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, map, count); -} - -CUresult CUDAAPI cuMemPoolGetAccess(CUmemAccess_flags *flags, - CUmemoryPool memPool, - CUmemLocation *location) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAccess_flags *, CUmemoryPool, CUmemLocation *); - static auto func_ptr = LoadSymbol("cuMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -CUresult CUDAAPI cuMemPoolCreate(CUmemoryPool *pool, - const CUmemPoolProps *poolProps) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, const CUmemPoolProps *); - static auto func_ptr = LoadSymbol("cuMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, poolProps); -} - -CUresult CUDAAPI cuMemPoolDestroy(CUmemoryPool pool) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool); - static auto func_ptr = LoadSymbol("cuMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool); -} - -CUresult CUDAAPI cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, - CUmemoryPool pool, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUmemoryPool, CUstream); - static auto func_ptr = LoadSymbol("cuMemAllocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, pool, hStream); -} - -CUresult CUDAAPI cuMemPoolExportToShareableHandle( - void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)( - void *, CUmemoryPool, CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = - LoadSymbol("cuMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle_out, pool, handleType, flags); -} - -CUresult CUDAAPI cuMemPoolImportFromShareableHandle( - CUmemoryPool *pool_out, void *handle, CUmemAllocationHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmemoryPool *, void *, CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = - LoadSymbol("cuMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool_out, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemPoolPtrExportData *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareData_out, ptr); -} - -CUresult CUDAAPI cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, - CUmemPoolPtrExportData *shareData) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUmemoryPool, - CUmemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cuMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr_out, pool, shareData); -} - -CUresult CUDAAPI cuPointerGetAttribute(void *data, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, attribute, ptr); -} - -CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, - CUdevice dstDevice, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); - static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, hStream); -} - -CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, - CUmem_advise advice, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); - static auto func_ptr = LoadSymbol("cuMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, - CUmem_range_attribute attribute, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, - CUmem_range_attribute *attributes, - size_t numAttributes, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)( - void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -CUresult CUDAAPI cuPointerSetAttribute(const void *value, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = - CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attribute, ptr); -} - -CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, - CUpointer_attribute *attributes, - void **data, CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, - void **, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numAttributes, attributes, data, ptr); -} - -CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, Flags); -} - -CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, - unsigned int flags, int priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); - static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, flags, priority); -} - -CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); - static auto func_ptr = LoadSymbol("cuStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); - static auto func_ptr = LoadSymbol("cuStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); - static auto func_ptr = LoadSymbol("cuStreamGetCtx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, pctx); -} - -CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, hEvent, Flags); -} - -CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, - CUstreamCallback callback, void *userData, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, callback, userData, flags); -} - -CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, - CUstreamCaptureMode mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureMode); - static auto func_ptr = LoadSymbol("cuStreamBeginCapture_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, mode); -} - -CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cuThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); - static auto func_ptr = LoadSymbol("cuStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, phGraph); -} - -CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, - CUstreamCaptureStatus *captureStatus) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus); -} - -CUresult CUDAAPI cuStreamGetCaptureInfo(CUstream hStream, - CUstreamCaptureStatus *captureStatus, - cuuint64_t *id) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *); - static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus, id); -} - -CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, - size_t length, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dptr, length, flags); -} - -CUresult CUDAAPI cuStreamQuery(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamCopyAttributes(CUstream dst, CUstream src) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstream); - static auto func_ptr = LoadSymbol("cuStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -CUresult CUDAAPI cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, - CUstreamAttrValue *value_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, CUstreamAttrValue *); - static auto func_ptr = LoadSymbol("cuStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -CUresult CUDAAPI cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, - const CUstreamAttrValue *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, const CUstreamAttrValue *); - static auto func_ptr = LoadSymbol("cuStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); - static auto func_ptr = LoadSymbol("cuEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, Flags); -} - -CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); - static auto func_ptr = LoadSymbol("cuEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream); -} - -CUresult CUDAAPI cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream, unsigned int); - static auto func_ptr = LoadSymbol("cuEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream, flags); -} - -CUresult CUDAAPI cuEventQuery(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, - CUevent hEnd) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); - static auto func_ptr = LoadSymbol("cuEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMilliseconds, hStart, hEnd); -} - -CUresult CUDAAPI -cuImportExternalMemory(CUexternalMemory *extMem_out, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( - CUdeviceptr *devPtr, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); - static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( - CUmipmappedArray *mipmap, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); - static auto func_ptr = - LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); - static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -CUresult CUDAAPI cuImportExternalSemaphore( - CUexternalSemaphore *extSem_out, - const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)( - CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -CUresult CUDAAPI cuSignalExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuWaitExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, - unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); - static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, - CUstreamBatchMemOpParams *paramArray, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, - CUstreamBatchMemOpParams *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamBatchMemOp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, count, paramArray, flags); -} - -CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, - CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, hfunc); -} - -CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, - CUfunction_attribute attrib, int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); - static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, attrib, value); -} - -CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); - static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, - CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); - static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, - unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams, void **extra) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchCooperativeKernel( - CUfunction f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **); - static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams); -} - -CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( - CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, - void *userData) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); - static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, fn, userData); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, - int y, int z) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); - static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, x, y, z); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, - unsigned int bytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, - unsigned int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSeti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, - float value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); - static auto func_ptr = LoadSymbol("cuParamSetf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, - void *ptr, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, ptr, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction); - static auto func_ptr = LoadSymbol("cuLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, - int grid_height) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); - static auto func_ptr = LoadSymbol("cuLaunchGrid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, - int grid_width, - int grid_height, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); - static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height, hStream); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, - int texunit, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); - static auto func_ptr = LoadSymbol("cuParamSetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, texunit, hTexRef); -} - -CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraph, flags); -} - -CUresult CUDAAPI cuGraphAddKernelNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeGetParams( - CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeSetParams( - CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - copyParams, ctx); -} - -CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, - CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, - const CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemsetNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - memsetParams, ctx); -} - -CUresult CUDAAPI cuGraphMemsetNodeGetParams( - CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemsetNodeSetParams( - CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, - CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeSetParams( - CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - childGraph); -} - -CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, - CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); - static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, phGraph); -} - -CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUevent); - static auto func_ptr = LoadSymbol("cuGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); -} - -CUresult CUDAAPI cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, - CUevent *event_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); - static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event_out); -} - -CUresult CUDAAPI cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); - static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event); -} - -CUresult CUDAAPI cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUevent); - static auto func_ptr = LoadSymbol("cuGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); -} - -CUresult CUDAAPI cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, - CUevent *event_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); - static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event_out); -} - -CUresult CUDAAPI cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); - static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event); -} - -CUresult CUDAAPI cuGraphAddExternalSemaphoresSignalNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeGetParams( - CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeSetParams( - CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddExternalSemaphoresWaitNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeGetParams( - CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeSetParams( - CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphClone, originalGraph); -} - -CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, - CUgraphNode hOriginalNode, - CUgraph hClonedGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phNode, hOriginalNode, hClonedGraph); -} - -CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, type); -} - -CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, - size_t *numNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, nodes, numNodes); -} - -CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, - size_t *numRootNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, rootNodes, numRootNodes); -} - -CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, size_t *numEdges) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numEdges); -} - -CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, - CUgraphNode *dependencies, - size_t *numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, - CUgraphNode *dependentNodes, - size_t *numDependentNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependentNodes, numDependentNodes); -} - -CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, - const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode); -} - -CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, - CUgraphNode *phErrorNode, char *logBuffer, - size_t bufferSize) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *, - char *, size_t); - static auto func_ptr = LoadSymbol("cuGraphInstantiate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize); -} - -CUresult CUDAAPI -cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, copyParams, ctx); -} - -CUresult CUDAAPI cuGraphExecMemsetNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)( - CUgraphExec, CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, memsetParams, ctx); -} - -CUresult CUDAAPI -cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUgraph); - static auto func_ptr = - LoadSymbol("cuGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, childGraph); -} - -CUresult CUDAAPI cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); - static auto func_ptr = - LoadSymbol("cuGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -CUresult CUDAAPI cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); - static auto func_ptr = - LoadSymbol("cuGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -CUresult CUDAAPI cuGraphExecExternalSemaphoresSignalNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecExternalSemaphoresWaitNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); - static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec); -} - -CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph); - static auto func_ptr = LoadSymbol("cuGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph); -} - -CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, - CUgraphNode *hErrorNode_out, - CUgraphExecUpdateResult *updateResult_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraph, CUgraphNode *, - CUgraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cuGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -CUresult CUDAAPI cuGraphKernelNodeCopyAttributes(CUgraphNode dst, - CUgraphNode src) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -CUresult CUDAAPI -cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, - CUkernelNodeAttrValue *value_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, - CUkernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -CUresult CUDAAPI -cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, - const CUkernelNodeAttrValue *value) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, - const CUkernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, - CUoccupancyB2DSize, size_t, int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)( - int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit, flags); -} - -CUresult CUDAAPI cuOccupancyAvailableDynamicSMemPerBlock( - size_t *dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUfunction, int, int); - static auto func_ptr = - LoadSymbol("cuOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, - CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmappedArray( - CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hMipmappedArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, - CUtexref hTexRef, - CUdeviceptr dptr, - size_t bytes) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ByteOffset, hTexRef, dptr, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, - CUdeviceptr dptr, size_t Pitch) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, desc, dptr, Pitch); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, - CUarray_format fmt, - int NumPackedComponents) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); - static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fmt, NumPackedComponents); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, - int dim, - CUaddress_mode am) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, dim, am); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, - CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, - float bias) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, bias); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelClamp( - CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, maxAniso); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, - float *pBorderColor) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); - static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, pBorderColor); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmappedArray( - CUmipmappedArray *phMipmappedArray, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phMipmappedArray, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, - CUtexref hTexRef, - int dim) { - using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); - static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pam, hTexRef, dim); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, - int *pNumChannels, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFormat, pNumChannels, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbias, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, - float *pmaxMipmapLevelClamp, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pmaxAniso, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pBorderColor, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); - static auto func_ptr = LoadSymbol("cuTexRefCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, - CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSurfRef, hArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, - CUsurfref hSurfRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); - static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hSurfRef); -} - -CUresult CUDAAPI -cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, - const CUDA_TEXTURE_DESC *pTexDesc, - const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, - const CUDA_TEXTURE_DESC *, - const CUDA_RESOURCE_VIEW_DESC *); - static auto func_ptr = LoadSymbol("cuTexObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceViewDesc( - CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, - const CUDA_RESOURCE_DESC *pResDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); - static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, - CUdevice peerDev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, dev, peerDev); -} - -CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext, Flags); -} - -CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext); -} - -CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, - CUdevice_P2PAttribute attrib, - CUdevice srcDevice, - CUdevice dstDevice) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attrib, srcDevice, dstDevice); -} - -CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); - static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( - CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, - unsigned int mipLevel) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArray, resource, arrayIndex, mipLevel); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( - CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMipmappedArray, resource); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( - CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevPtr, pSize, resource); -} - -CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, - const CUuuid *pExportTableId) { - using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); - static auto func_ptr = LoadSymbol("cuGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -CUresult CUDAAPI cuFuncGetModule(CUmodule *hmod, CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetModule"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod, hfunc); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_12_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_12_0.inc deleted file mode 100644 index 7ec62ecb7c19b0..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_12_0.inc +++ /dev/null @@ -1,3323 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { - using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); - static auto func_ptr = LoadSymbol("cuGetErrorName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(error, pStr); -} - -CUresult CUDAAPI cuInit(unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cuInit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(Flags); -} - -CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); - static auto func_ptr = LoadSymbol("cuDeviceGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, ordinal); -} - -CUresult CUDAAPI cuDeviceGetCount(int *count) { - using FuncPtr = CUresult(CUDAAPI *)(int *); - static auto func_ptr = LoadSymbol("cuDeviceGetCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(name, len, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceGetUuid_v2(CUuuid *uuid, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetUuid_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(uuid, dev); -} - -CUresult CUDAAPI cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, unsigned int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetLuid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(luid, deviceNodeMask, dev); -} - -CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(bytes, dev); -} - -CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, - CUarray_format format, - unsigned numChannels, - CUdevice dev) { - using FuncPtr = - CUresult(CUDAAPI *)(size_t *, CUarray_format, unsigned int, CUdevice); - static auto func_ptr = - LoadSymbol("cuDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, format, numChannels, dev); -} - -CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, dev); -} - -CUresult CUDAAPI cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, - CUdevice dev, int flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdevice, int); - static auto func_ptr = LoadSymbol("cuDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, dev, flags); -} - -CUresult CUDAAPI cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUmemoryPool); - static auto func_ptr = LoadSymbol("cuDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pool); -} - -CUresult CUDAAPI cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, dev); -} - -CUresult CUDAAPI cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool_out, dev); -} - -CUresult CUDAAPI cuDeviceGetExecAffinitySupport(int *pi, - CUexecAffinityType type, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUexecAffinityType, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetExecAffinitySupport"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, type, dev); -} - -CUresult CUDAAPI -cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, - CUflushGPUDirectRDMAWritesScope scope) { - using FuncPtr = CUresult(CUDAAPI *)(CUflushGPUDirectRDMAWritesTarget, - CUflushGPUDirectRDMAWritesScope); - static auto func_ptr = LoadSymbol("cuFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, dev); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, - int *minor, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags); -} - -CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, - int *active) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, flags, active); -} - -CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev); -} - -CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags, dev); -} - -CUresult CUDAAPI cuCtxCreate_v3(CUcontext *pctx, - CUexecAffinityParam *paramsArray, int numParams, - unsigned int flags, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUexecAffinityParam *, int, - unsigned int, CUdevice); - static auto func_ptr = LoadSymbol("cuCtxCreate_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, paramsArray, numParams, flags, dev); -} - -CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); - static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx); -} - -CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); - static auto func_ptr = LoadSymbol("cuCtxGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -CUresult CUDAAPI cuCtxGetId(CUcontext ctx, unsigned long long *ctxId) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned long long *); - static auto func_ptr = LoadSymbol("cuCtxGetId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, ctxId); -} - -CUresult CUDAAPI cuCtxSynchronize(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { - using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); - static auto func_ptr = LoadSymbol("cuCtxSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); - static auto func_ptr = LoadSymbol("cuCtxGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pvalue, limit); -} - -CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); - static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pconfig); -} - -CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); - static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); - static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); - static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); - static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, version); -} - -CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, - int *greatestPriority) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *); - static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -CUresult CUDAAPI cuCtxResetPersistingL2Cache(void) { - using FuncPtr = CUresult(CUDAAPI *)(); - static auto func_ptr = LoadSymbol("cuCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUresult CUDAAPI cuCtxGetExecAffinity(CUexecAffinityParam *pExecAffinity, - CUexecAffinityType type) { - using FuncPtr = - CUresult(CUDAAPI *)(CUexecAffinityParam *, CUexecAffinityType); - static auto func_ptr = LoadSymbol("cuCtxGetExecAffinity"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pExecAffinity, type); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxAttach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pctx, flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDetach"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx); -} - -CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); - static auto func_ptr = LoadSymbol("cuModuleLoad"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fname); -} - -CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image); -} - -CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, - unsigned int numOptions, - CUjit_option *options, - void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, - CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, image, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); - static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(module, fatCubin); -} - -CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule); - static auto func_ptr = LoadSymbol("cuModuleUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod); -} - -CUresult CUDAAPI cuModuleGetLoadingMode(CUmoduleLoadingMode *mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUmoduleLoadingMode *); - static auto func_ptr = LoadSymbol("cuModuleGetLoadingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, hmod, name); -} - -CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUmodule hmod, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, hmod, name); -} - -CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, - void **optionValues, CUlinkState *stateOut) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); - static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numOptions, options, optionValues, stateOut); -} - -CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, - void *data, size_t size, const char *name, - unsigned int numOptions, CUjit_option *options, - void **optionValues) { - using FuncPtr = - CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, - const char *, unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, data, size, name, numOptions, options, - optionValues); -} - -CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, - const char *path, unsigned int numOptions, - CUjit_option *options, void **optionValues) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, - unsigned int, CUjit_option *, void **); - static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, type, path, numOptions, options, optionValues); -} - -CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, - size_t *sizeOut) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); - static auto func_ptr = LoadSymbol("cuLinkComplete"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state, cubinOut, sizeOut); -} - -CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { - using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); - static auto func_ptr = LoadSymbol("cuLinkDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(state); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, - CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef, hmod, name); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, - CUmodule hmod, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); - static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfRef, hmod, name); -} - -CUresult CUDAAPI cuLibraryLoadData(CUlibrary *library, const void *code, - CUjit_option *jitOptions, - void **jitOptionsValues, - unsigned int numJitOptions, - CUlibraryOption *libraryOptions, - void **libraryOptionValues, - unsigned int numLibraryOptions) { - using FuncPtr = CUresult(CUDAAPI *)(CUlibrary *, const void *, CUjit_option *, - void **, unsigned int, CUlibraryOption *, - void **, unsigned int); - static auto func_ptr = LoadSymbol("cuLibraryLoadData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(library, code, jitOptions, jitOptionsValues, numJitOptions, - libraryOptions, libraryOptionValues, numLibraryOptions); -} - -CUresult CUDAAPI cuLibraryLoadFromFile(CUlibrary *library, const char *fileName, - CUjit_option *jitOptions, - void **jitOptionsValues, - unsigned int numJitOptions, - CUlibraryOption *libraryOptions, - void **libraryOptionValues, - unsigned int numLibraryOptions) { - using FuncPtr = CUresult(CUDAAPI *)(CUlibrary *, const char *, CUjit_option *, - void **, unsigned int, CUlibraryOption *, - void **, unsigned int); - static auto func_ptr = LoadSymbol("cuLibraryLoadFromFile"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(library, fileName, jitOptions, jitOptionsValues, - numJitOptions, libraryOptions, libraryOptionValues, - numLibraryOptions); -} - -CUresult CUDAAPI cuLibraryUnload(CUlibrary library) { - using FuncPtr = CUresult(CUDAAPI *)(CUlibrary); - static auto func_ptr = LoadSymbol("cuLibraryUnload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(library); -} - -CUresult CUDAAPI cuLibraryGetKernel(CUkernel *pKernel, CUlibrary library, - const char *name) { - using FuncPtr = CUresult(CUDAAPI *)(CUkernel *, CUlibrary, const char *); - static auto func_ptr = LoadSymbol("cuLibraryGetKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pKernel, library, name); -} - -CUresult CUDAAPI cuLibraryGetModule(CUmodule *pMod, CUlibrary library) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, CUlibrary); - static auto func_ptr = LoadSymbol("cuLibraryGetModule"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMod, library); -} - -CUresult CUDAAPI cuKernelGetFunction(CUfunction *pFunc, CUkernel kernel) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUkernel); - static auto func_ptr = LoadSymbol("cuKernelGetFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFunc, kernel); -} - -CUresult CUDAAPI cuLibraryGetGlobal(CUdeviceptr *dptr, size_t *bytes, - CUlibrary library, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUlibrary, const char *); - static auto func_ptr = LoadSymbol("cuLibraryGetGlobal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, library, name); -} - -CUresult CUDAAPI cuLibraryGetManaged(CUdeviceptr *dptr, size_t *bytes, - CUlibrary library, const char *name) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUlibrary, const char *); - static auto func_ptr = LoadSymbol("cuLibraryGetManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytes, library, name); -} - -CUresult CUDAAPI cuLibraryGetUnifiedFunction(void **fptr, CUlibrary library, - const char *symbol) { - using FuncPtr = CUresult(CUDAAPI *)(void **, CUlibrary, const char *); - static auto func_ptr = LoadSymbol("cuLibraryGetUnifiedFunction"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(fptr, library, symbol); -} - -CUresult CUDAAPI cuKernelGetAttribute(int *pi, CUfunction_attribute attrib, - CUkernel kernel, CUdevice dev) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUkernel, CUdevice); - static auto func_ptr = LoadSymbol("cuKernelGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, kernel, dev); -} - -CUresult CUDAAPI cuKernelSetAttribute(CUfunction_attribute attrib, int val, - CUkernel kernel, CUdevice dev) { - using FuncPtr = - CUresult(CUDAAPI *)(CUfunction_attribute, int, CUkernel, CUdevice); - static auto func_ptr = LoadSymbol("cuKernelSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attrib, val, kernel, dev); -} - -CUresult CUDAAPI cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, - CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(CUkernel, CUfunc_cache, CUdevice); - static auto func_ptr = LoadSymbol("cuKernelSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kernel, config, dev); -} - -CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); - static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize); -} - -CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, - unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); -} - -CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemFree_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbase, psize, dptr); -} - -CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize); -} - -CUresult CUDAAPI cuMemFreeHost(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pp, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, p, Flags); -} - -CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, p); -} - -CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemAllocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, flags); -} - -CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); - static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, pciBusId); -} - -CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { - using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, dev); -} - -CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); - static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, event); -} - -CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, - CUipcEventHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); - static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, handle); -} - -CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, dptr); -} - -CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, - unsigned int Flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); - static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, handle, Flags); -} - -CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); - static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr); -} - -CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, bytesize, Flags); -} - -CUresult CUDAAPI cuMemHostUnregister(void *p) { - using FuncPtr = CUresult(CUDAAPI *)(void *); - static auto func_ptr = LoadSymbol("cuMemHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount); -} - -CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, - CUdeviceptr srcDevice, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, - size_t srcOffset, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, - CUarray srcArray, size_t srcOffset, - size_t ByteCount) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); -} - -CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); - static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy); -} - -CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, - CUdeviceptr srcDevice, CUcontext srcContext, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, - CUcontext, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, - hStream); -} - -CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, - size_t ByteCount, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, srcDevice, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, - const void *srcHost, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, - size_t srcOffset, size_t ByteCount, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); -} - -CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); - static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCopy, hStream); -} - -CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N); -} - -CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, - size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N); -} - -CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N); -} - -CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height); -} - -CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, size_t Height) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); - static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height); -} - -CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, uc, N, hStream); -} - -CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, us, N, hStream); -} - -CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, - size_t N, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, ui, N, hStream); -} - -CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned char uc, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned short us, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, - size_t, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); -} - -CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, - unsigned int ui, size_t Width, - size_t Height, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, - size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); -} - -CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, - const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, - CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI cuArrayGetSparseProperties( - CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUarray array) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUarray); - static auto func_ptr = LoadSymbol("cuArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -CUresult CUDAAPI cuMipmappedArrayGetSparseProperties( - CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUmipmappedArray mipmap) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUmipmappedArray); - static auto func_ptr = - LoadSymbol("cuMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -CUresult CUDAAPI -cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, - CUarray array, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_ARRAY_MEMORY_REQUIREMENTS *, CUarray, CUdevice); - static auto func_ptr = LoadSymbol("cuArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, array, device); -} - -CUresult CUDAAPI cuMipmappedArrayGetMemoryRequirements( - CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, CUmipmappedArray mipmap, - CUdevice device) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_MEMORY_REQUIREMENTS *, - CUmipmappedArray, CUdevice); - static auto func_ptr = - LoadSymbol("cuMipmappedArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, mipmap, device); -} - -CUresult CUDAAPI cuArrayGetPlane(CUarray *pPlaneArray, CUarray hArray, - unsigned int planeIdx) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray); - static auto func_ptr = LoadSymbol("cuArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hArray); -} - -CUresult CUDAAPI cuArray3DCreate( - CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); - static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pAllocateArray); -} - -CUresult CUDAAPI cuArray3DGetDescriptor( - CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); - static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArrayDescriptor, hArray); -} - -CUresult CUDAAPI -cuMipmappedArrayCreate(CUmipmappedArray *pHandle, - const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); -} - -CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, - CUmipmappedArray hMipmappedArray, - unsigned int level) { - using FuncPtr = - CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pLevelArray, hMipmappedArray, level); -} - -CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); - static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hMipmappedArray); -} - -CUresult CUDAAPI cuMemGetHandleForAddressRange(void *handle, CUdeviceptr dptr, - size_t size, - CUmemRangeHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, - CUmemRangeHandleType, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemGetHandleForAddressRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dptr, size, handleType, flags); -} - -CUresult CUDAAPI cuMemAddressReserve(CUdeviceptr *ptr, size_t size, - size_t alignment, CUdeviceptr addr, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, size_t, - CUdeviceptr, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemAddressReserve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, alignment, addr, flags); -} - -CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemAddressFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, - const CUmemAllocationProp *prop, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, size_t, - const CUmemAllocationProp *, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, size, prop, flags); -} - -CUresult CUDAAPI cuMemRelease(CUmemGenericAllocationHandle handle) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle); - static auto func_ptr = LoadSymbol("cuMemRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -CUresult CUDAAPI cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, - CUmemGenericAllocationHandle handle, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, size_t, - CUmemGenericAllocationHandle, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemMap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, offset, handle, flags); -} - -CUresult CUDAAPI cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, - unsigned int count, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUarrayMapInfo *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuMemMapArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mapInfoList, count, hStream); -} - -CUresult CUDAAPI cuMemUnmap(CUdeviceptr ptr, size_t size) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemUnmap"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -CUresult CUDAAPI cuMemSetAccess(CUdeviceptr ptr, size_t size, - const CUmemAccessDesc *desc, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, desc, count); -} - -CUresult CUDAAPI cuMemGetAccess(unsigned long long *flags, - const CUmemLocation *location, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned long long *, - const CUmemLocation *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, location, ptr); -} - -CUresult CUDAAPI cuMemExportToShareableHandle( - void *shareableHandle, CUmemGenericAllocationHandle handle, - CUmemAllocationHandleType handleType, unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(void *, CUmemGenericAllocationHandle, - CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = LoadSymbol("cuMemExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemImportFromShareableHandle( - CUmemGenericAllocationHandle *handle, void *osHandle, - CUmemAllocationHandleType shHandleType) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *, - CUmemAllocationHandleType); - static auto func_ptr = LoadSymbol("cuMemImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, osHandle, shHandleType); -} - -CUresult CUDAAPI cuMemGetAllocationGranularity( - size_t *granularity, const CUmemAllocationProp *prop, - CUmemAllocationGranularity_flags option) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, const CUmemAllocationProp *, - CUmemAllocationGranularity_flags); - static auto func_ptr = LoadSymbol("cuMemGetAllocationGranularity"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(granularity, prop, option); -} - -CUresult CUDAAPI cuMemGetAllocationPropertiesFromHandle( - CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAllocationProp *, CUmemGenericAllocationHandle); - static auto func_ptr = - LoadSymbol("cuMemGetAllocationPropertiesFromHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, handle); -} - -CUresult CUDAAPI -cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *); - static auto func_ptr = LoadSymbol("cuMemRetainAllocationHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, addr); -} - -CUresult CUDAAPI cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUstream); - static auto func_ptr = LoadSymbol("cuMemFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, hStream); -} - -CUresult CUDAAPI cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUstream); - static auto func_ptr = LoadSymbol("cuMemAllocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, hStream); -} - -CUresult CUDAAPI cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool, size_t); - static auto func_ptr = LoadSymbol("cuMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, minBytesToKeep); -} - -CUresult CUDAAPI cuMemPoolSetAttribute(CUmemoryPool pool, - CUmemPool_attribute attr, void *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); - static auto func_ptr = LoadSymbol("cuMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, attr, value); -} - -CUresult CUDAAPI cuMemPoolGetAttribute(CUmemoryPool pool, - CUmemPool_attribute attr, void *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); - static auto func_ptr = LoadSymbol("cuMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, attr, value); -} - -CUresult CUDAAPI cuMemPoolSetAccess(CUmemoryPool pool, - const CUmemAccessDesc *map, size_t count) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemoryPool, const CUmemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cuMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, map, count); -} - -CUresult CUDAAPI cuMemPoolGetAccess(CUmemAccess_flags *flags, - CUmemoryPool memPool, - CUmemLocation *location) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmemAccess_flags *, CUmemoryPool, CUmemLocation *); - static auto func_ptr = LoadSymbol("cuMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -CUresult CUDAAPI cuMemPoolCreate(CUmemoryPool *pool, - const CUmemPoolProps *poolProps) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, const CUmemPoolProps *); - static auto func_ptr = LoadSymbol("cuMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool, poolProps); -} - -CUresult CUDAAPI cuMemPoolDestroy(CUmemoryPool pool) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool); - static auto func_ptr = LoadSymbol("cuMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool); -} - -CUresult CUDAAPI cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, - CUmemoryPool pool, CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUmemoryPool, CUstream); - static auto func_ptr = LoadSymbol("cuMemAllocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dptr, bytesize, pool, hStream); -} - -CUresult CUDAAPI cuMemPoolExportToShareableHandle( - void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)( - void *, CUmemoryPool, CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = - LoadSymbol("cuMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle_out, pool, handleType, flags); -} - -CUresult CUDAAPI cuMemPoolImportFromShareableHandle( - CUmemoryPool *pool_out, void *handle, CUmemAllocationHandleType handleType, - unsigned long long flags) { - using FuncPtr = CUresult(CUDAAPI *)( - CUmemoryPool *, void *, CUmemAllocationHandleType, unsigned long long); - static auto func_ptr = - LoadSymbol("cuMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pool_out, handle, handleType, flags); -} - -CUresult CUDAAPI cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUmemPoolPtrExportData *, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareData_out, ptr); -} - -CUresult CUDAAPI cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, - CUmemPoolPtrExportData *shareData) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUmemoryPool, - CUmemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cuMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr_out, pool, shareData); -} - -CUresult CUDAAPI cuPointerGetAttribute(void *data, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, attribute, ptr); -} - -CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, - CUdevice dstDevice, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); - static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, hStream); -} - -CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, - CUmem_advise advice, CUdevice device) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); - static auto func_ptr = LoadSymbol("cuMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, - CUmem_range_attribute attribute, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, - CUmem_range_attribute *attributes, - size_t numAttributes, - CUdeviceptr devPtr, size_t count) { - using FuncPtr = CUresult(CUDAAPI *)( - void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -CUresult CUDAAPI cuPointerSetAttribute(const void *value, - CUpointer_attribute attribute, - CUdeviceptr ptr) { - using FuncPtr = - CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attribute, ptr); -} - -CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, - CUpointer_attribute *attributes, - void **data, CUdeviceptr ptr) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, - void **, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numAttributes, attributes, data, ptr); -} - -CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, Flags); -} - -CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, - unsigned int flags, int priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); - static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phStream, flags, priority); -} - -CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); - static auto func_ptr = LoadSymbol("cuStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); - static auto func_ptr = LoadSymbol("cuStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -CUresult CUDAAPI cuStreamGetId(CUstream hStream, unsigned long long *streamId) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned long long *); - static auto func_ptr = LoadSymbol("cuStreamGetId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, streamId); -} - -CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); - static auto func_ptr = LoadSymbol("cuStreamGetCtx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, pctx); -} - -CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, hEvent, Flags); -} - -CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, - CUstreamCallback callback, void *userData, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, callback, userData, flags); -} - -CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, - CUstreamCaptureMode mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureMode); - static auto func_ptr = LoadSymbol("cuStreamBeginCapture_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, mode); -} - -CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode) { - using FuncPtr = CUresult(CUDAAPI *)(CUstreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cuThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); - static auto func_ptr = LoadSymbol("cuStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, phGraph); -} - -CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, - CUstreamCaptureStatus *captureStatus) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus); -} - -CUresult CUDAAPI cuStreamGetCaptureInfo( - CUstream hStream, CUstreamCaptureStatus *captureStatus_out, - cuuint64_t *id_out, CUgraph *graph_out, - const CUgraphNode **dependencies_out, size_t *numDependencies_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *, - CUgraph *, const CUgraphNode **, size_t *); - static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -CUresult CUDAAPI cuStreamUpdateCaptureDependencies(CUstream hStream, - CUgraphNode *dependencies, - size_t numDependencies, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUgraphNode *, size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cuStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dependencies, numDependencies, flags); -} - -CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, - size_t length, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, dptr, length, flags); -} - -CUresult CUDAAPI cuStreamQuery(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream); - static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream); -} - -CUresult CUDAAPI cuStreamCopyAttributes(CUstream dst, CUstream src) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstream); - static auto func_ptr = LoadSymbol("cuStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -CUresult CUDAAPI cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, - CUstreamAttrValue *value_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, CUstreamAttrValue *); - static auto func_ptr = LoadSymbol("cuStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -CUresult CUDAAPI cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, - const CUstreamAttrValue *value) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, const CUstreamAttrValue *); - static auto func_ptr = LoadSymbol("cuStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); - static auto func_ptr = LoadSymbol("cuEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phEvent, Flags); -} - -CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); - static auto func_ptr = LoadSymbol("cuEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream); -} - -CUresult CUDAAPI cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream, unsigned int); - static auto func_ptr = LoadSymbol("cuEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent, hStream, flags); -} - -CUresult CUDAAPI cuEventQuery(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { - using FuncPtr = CUresult(CUDAAPI *)(CUevent); - static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hEvent); -} - -CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, - CUevent hEnd) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); - static auto func_ptr = LoadSymbol("cuEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMilliseconds, hStart, hEnd); -} - -CUresult CUDAAPI -cuImportExternalMemory(CUexternalMemory *extMem_out, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, - const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( - CUdeviceptr *devPtr, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); - static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( - CUmipmappedArray *mipmap, CUexternalMemory extMem, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, - const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); - static auto func_ptr = - LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); - static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -CUresult CUDAAPI cuImportExternalSemaphore( - CUexternalSemaphore *extSem_out, - const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { - using FuncPtr = CUresult(CUDAAPI *)( - CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); - static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -CUresult CUDAAPI cuSignalExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, - const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuWaitExternalSemaphoresAsync( - const CUexternalSemaphore *extSemArray, - const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, - unsigned int numExtSems, CUstream stream) { - using FuncPtr = CUresult(CUDAAPI *)( - const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, - unsigned int, CUstream); - static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { - using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); - static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWaitValue64_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, - cuuint32_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue32_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, - cuuint64_t value, unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamWriteValue64_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, addr, value, flags); -} - -CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, - CUstreamBatchMemOpParams *paramArray, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, - CUstreamBatchMemOpParams *, unsigned int); - static auto func_ptr = LoadSymbol("cuStreamBatchMemOp_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, count, paramArray, flags); -} - -CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, - CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pi, attrib, hfunc); -} - -CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, - CUfunction_attribute attrib, int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); - static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, attrib, value); -} - -CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); - static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, - CUsharedconfig config) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); - static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, config); -} - -CUresult CUDAAPI cuFuncGetModule(CUmodule *hmod, CUfunction hfunc) { - using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, CUfunction); - static auto func_ptr = LoadSymbol("cuFuncGetModule"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hmod, hfunc); -} - -CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, - unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams, void **extra) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, - void **kernelParams, void **extra) { - using FuncPtr = - CUresult(CUDAAPI *)(const CUlaunchConfig *, CUfunction, void **, void **); - static auto func_ptr = LoadSymbol("cuLaunchKernelEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, f, kernelParams, extra); -} - -CUresult CUDAAPI cuLaunchCooperativeKernel( - CUfunction f, unsigned int gridDimX, unsigned int gridDimY, - unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, - void **kernelParams) { - using FuncPtr = CUresult(CUDAAPI *)( - CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, CUstream, void **); - static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, - blockDimZ, sharedMemBytes, hStream, kernelParams); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( - CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, - void *userData) { - using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); - static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, fn, userData); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, - int y, int z) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); - static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, x, y, z); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, - unsigned int bytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, - unsigned int value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSeti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, - float value) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); - static auto func_ptr = LoadSymbol("cuParamSetf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, value); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, - void *ptr, - unsigned int numbytes) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); - static auto func_ptr = LoadSymbol("cuParamSetv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, offset, ptr, numbytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction); - static auto func_ptr = LoadSymbol("cuLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, - int grid_height) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); - static auto func_ptr = LoadSymbol("cuLaunchGrid"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, - int grid_width, - int grid_height, - CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); - static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(f, grid_width, grid_height, hStream); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, - int texunit, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); - static auto func_ptr = LoadSymbol("cuParamSetTexRef"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hfunc, texunit, hTexRef); -} - -CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraph, flags); -} - -CUresult CUDAAPI cuGraphAddKernelNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddKernelNode_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeGetParams( - CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphKernelNodeSetParams( - CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - copyParams, ctx); -} - -CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, - CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, - const CUDA_MEMCPY3D *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); - static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemsetNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, - CUcontext ctx) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - memsetParams, ctx); -} - -CUresult CUDAAPI cuGraphMemsetNodeGetParams( - CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphMemsetNodeSetParams( - CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, - CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphHostNodeSetParams( - CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - childGraph); -} - -CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, - CUgraph *phGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); - static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, phGraph); -} - -CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUevent); - static auto func_ptr = LoadSymbol("cuGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); -} - -CUresult CUDAAPI cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, - CUevent *event_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); - static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event_out); -} - -CUresult CUDAAPI cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); - static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event); -} - -CUresult CUDAAPI cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, - CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUevent); - static auto func_ptr = LoadSymbol("cuGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); -} - -CUresult CUDAAPI cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, - CUevent *event_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); - static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event_out); -} - -CUresult CUDAAPI cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); - static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, event); -} - -CUresult CUDAAPI cuGraphAddExternalSemaphoresSignalNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeGetParams( - CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeSetParams( - CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddExternalSemaphoresWaitNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeGetParams( - CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeSetParams( - CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddBatchMemOpNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - const CUDA_BATCH_MEM_OP_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddBatchMemOpNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphBatchMemOpNodeGetParams( - CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_BATCH_MEM_OP_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphBatchMemOpNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams_out); -} - -CUresult CUDAAPI cuGraphBatchMemOpNodeSetParams( - CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphBatchMemOpNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecBatchMemOpNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_BATCH_MEM_OP_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecBatchMemOpNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphAddMemAllocNode( - CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, - size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, - CUDA_MEM_ALLOC_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphAddMemAllocNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, - nodeParams); -} - -CUresult CUDAAPI cuGraphMemAllocNodeGetParams( - CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS *params_out) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEM_ALLOC_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphMemAllocNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -CUresult CUDAAPI cuGraphAddMemFreeNode(CUgraphNode *phGraphNode, CUgraph hGraph, - const CUgraphNode *dependencies, - size_t numDependencies, - CUdeviceptr dptr) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, - const CUgraphNode *, size_t, CUdeviceptr); - static auto func_ptr = LoadSymbol("cuGraphAddMemFreeNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, dptr); -} - -CUresult CUDAAPI cuGraphMemFreeNodeGetParams(CUgraphNode hNode, - CUdeviceptr *dptr_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUdeviceptr *); - static auto func_ptr = LoadSymbol("cuGraphMemFreeNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dptr_out); -} - -CUresult CUDAAPI cuDeviceGraphMemTrim(CUdevice device) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGraphMemTrim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -CUresult CUDAAPI cuDeviceGetGraphMemAttribute(CUdevice device, - CUgraphMem_attribute attr, - void *value) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUgraphMem_attribute, void *); - static auto func_ptr = LoadSymbol("cuDeviceGetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -CUresult CUDAAPI cuDeviceSetGraphMemAttribute(CUdevice device, - CUgraphMem_attribute attr, - void *value) { - using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUgraphMem_attribute, void *); - static auto func_ptr = LoadSymbol("cuDeviceSetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphClone, originalGraph); -} - -CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, - CUgraphNode hOriginalNode, - CUgraph hClonedGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); - static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phNode, hOriginalNode, hClonedGraph); -} - -CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, type); -} - -CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, - size_t *numNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, nodes, numNodes); -} - -CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, - size_t *numRootNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, rootNodes, numRootNodes); -} - -CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, - CUgraphNode *to, size_t *numEdges) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numEdges); -} - -CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, - CUgraphNode *dependencies, - size_t *numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependencies, numDependencies); -} - -CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, - CUgraphNode *dependentNodes, - size_t *numDependentNodes) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, dependentNodes, numDependentNodes); -} - -CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, - const CUgraphNode *from, - const CUgraphNode *to, - size_t numDependencies) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, - const CUgraphNode *, size_t); - static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, from, to, numDependencies); -} - -CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode); -} - -CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, - unsigned long long flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, unsigned long long); - static auto func_ptr = LoadSymbol("cuGraphInstantiateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, flags); -} - -CUresult CUDAAPI -cuGraphInstantiateWithParams(CUgraphExec *phGraphExec, CUgraph hGraph, - CUDA_GRAPH_INSTANTIATE_PARAMS *instantiateParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, - CUDA_GRAPH_INSTANTIATE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphInstantiateWithParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phGraphExec, hGraph, instantiateParams); -} - -CUresult CUDAAPI cuGraphExecGetFlags(CUgraphExec hGraphExec, - cuuint64_t *flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, cuuint64_t *); - static auto func_ptr = LoadSymbol("cuGraphExecGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, flags); -} - -CUresult CUDAAPI -cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_KERNEL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_KERNEL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecKernelNodeSetParams_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - const CUDA_MEMCPY3D *copyParams, - CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_MEMCPY3D *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, copyParams, ctx); -} - -CUresult CUDAAPI cuGraphExecMemsetNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { - using FuncPtr = CUresult(CUDAAPI *)( - CUgraphExec, CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *, CUcontext); - static auto func_ptr = LoadSymbol("cuGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, memsetParams, ctx); -} - -CUresult CUDAAPI -cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_HOST_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_HOST_NODE_PARAMS *); - static auto func_ptr = LoadSymbol("cuGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUgraph childGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUgraph); - static auto func_ptr = - LoadSymbol("cuGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, childGraph); -} - -CUresult CUDAAPI cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); - static auto func_ptr = - LoadSymbol("cuGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -CUresult CUDAAPI cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, - CUgraphNode hNode, - CUevent event) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); - static auto func_ptr = - LoadSymbol("cuGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -CUresult CUDAAPI cuGraphExecExternalSemaphoresSignalNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphExecExternalSemaphoresWaitNodeSetParams( - CUgraphExec hGraphExec, CUgraphNode hNode, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, - const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); - static auto func_ptr = - LoadSymbol("cuGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -CUresult CUDAAPI cuGraphNodeSetEnabled(CUgraphExec hGraphExec, - CUgraphNode hNode, - unsigned int isEnabled) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphNodeSetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -CUresult CUDAAPI cuGraphNodeGetEnabled(CUgraphExec hGraphExec, - CUgraphNode hNode, - unsigned int *isEnabled) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, unsigned int *); - static auto func_ptr = LoadSymbol("cuGraphNodeGetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -CUresult CUDAAPI cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); - static auto func_ptr = LoadSymbol("cuGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hStream); -} - -CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); - static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec); -} - -CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph); - static auto func_ptr = LoadSymbol("cuGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph); -} - -CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, - CUgraphExecUpdateResultInfo *resultInfo) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraphExec, CUgraph, CUgraphExecUpdateResultInfo *); - static auto func_ptr = LoadSymbol("cuGraphExecUpdate_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, resultInfo); -} - -CUresult CUDAAPI cuGraphKernelNodeCopyAttributes(CUgraphNode dst, - CUgraphNode src) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -CUresult CUDAAPI -cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, - CUkernelNodeAttrValue *value_out) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, - CUkernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -CUresult CUDAAPI -cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, - const CUkernelNodeAttrValue *value) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, - const CUkernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -CUresult CUDAAPI cuGraphDebugDotPrint(CUgraph hGraph, const char *path, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraph, path, flags); -} - -CUresult CUDAAPI cuUserObjectCreate(CUuserObject *object_out, void *ptr, - CUhostFn destroy, - unsigned int initialRefcount, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUuserObject *, void *, CUhostFn, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cuUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -CUresult CUDAAPI cuUserObjectRetain(CUuserObject object, unsigned int count) { - using FuncPtr = CUresult(CUDAAPI *)(CUuserObject, unsigned int); - static auto func_ptr = LoadSymbol("cuUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -CUresult CUDAAPI cuUserObjectRelease(CUuserObject object, unsigned int count) { - using FuncPtr = CUresult(CUDAAPI *)(CUuserObject, unsigned int); - static auto func_ptr = LoadSymbol("cuUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -CUresult CUDAAPI cuGraphRetainUserObject(CUgraph graph, CUuserObject object, - unsigned int count, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(CUgraph, CUuserObject, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -CUresult CUDAAPI cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, - unsigned int count) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUuserObject, unsigned int); - static auto func_ptr = LoadSymbol("cuGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit) { - using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, - CUoccupancyB2DSize, size_t, int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( - int *minGridSize, int *blockSize, CUfunction func, - CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, - int blockSizeLimit, unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)( - int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, - dynamicSMemSize, blockSizeLimit, flags); -} - -CUresult CUDAAPI cuOccupancyAvailableDynamicSMemPerBlock( - size_t *dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUfunction, int, int); - static auto func_ptr = - LoadSymbol("cuOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -CUresult CUDAAPI cuOccupancyMaxPotentialClusterSize( - int *clusterSize, CUfunction func, const CUlaunchConfig *config) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, const CUlaunchConfig *); - static auto func_ptr = - LoadSymbol("cuOccupancyMaxPotentialClusterSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(clusterSize, func, config); -} - -CUresult CUDAAPI cuOccupancyMaxActiveClusters(int *numClusters, CUfunction func, - const CUlaunchConfig *config) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUfunction, const CUlaunchConfig *); - static auto func_ptr = LoadSymbol("cuOccupancyMaxActiveClusters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numClusters, func, config); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, - CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmappedArray( - CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, hMipmappedArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, - CUtexref hTexRef, - CUdeviceptr dptr, - size_t bytes) { - using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ByteOffset, hTexRef, dptr, bytes); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, - CUdeviceptr dptr, size_t Pitch) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, - CUdeviceptr, size_t); - static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, desc, dptr, Pitch); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, - CUarray_format fmt, - int NumPackedComponents) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); - static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fmt, NumPackedComponents); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, - int dim, - CUaddress_mode am) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, dim, am); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, - CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, fm); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, - float bias) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, bias); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelClamp( - CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); - static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, maxAniso); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, - float *pBorderColor) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); - static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, pBorderColor); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); - static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pdptr, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmappedArray( - CUmipmappedArray *phMipmappedArray, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phMipmappedArray, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, - CUtexref hTexRef, - int dim) { - using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); - static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pam, hTexRef, dim); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, - int *pNumChannels, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFormat, pNumChannels, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pfm, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pbias, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI -cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, - float *pmaxMipmapLevelClamp, CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pmaxAniso, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pBorderColor, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, - CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); - static auto func_ptr = LoadSymbol("cuTexRefCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexref); - static auto func_ptr = LoadSymbol("cuTexRefDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hTexRef); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, - CUarray hArray, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); - static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSurfRef, hArray, Flags); -} - -__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, - CUsurfref hSurfRef) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); - static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(phArray, hSurfRef); -} - -CUresult CUDAAPI -cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, - const CUDA_TEXTURE_DESC *pTexDesc, - const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, - const CUDA_TEXTURE_DESC *, - const CUDA_RESOURCE_VIEW_DESC *); - static auto func_ptr = LoadSymbol("cuTexObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, - CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -CUresult CUDAAPI cuTexObjectGetResourceViewDesc( - CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); - static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, - const CUDA_RESOURCE_DESC *pResDesc) { - using FuncPtr = - CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); - static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, - CUsurfObject surfObject) { - using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); - static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -CUresult CUDAAPI cuTensorMapEncodeTiled( - CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, - cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, - const cuuint64_t *globalStrides, const cuuint32_t *boxDim, - const cuuint32_t *elementStrides, CUtensorMapInterleave interleave, - CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, - CUtensorMapFloatOOBfill oobFill) { - using FuncPtr = CUresult(CUDAAPI *)( - CUtensorMap *, CUtensorMapDataType, cuuint32_t, void *, - const cuuint64_t *, const cuuint64_t *, const cuuint32_t *, - const cuuint32_t *, CUtensorMapInterleave, CUtensorMapSwizzle, - CUtensorMapL2promotion, CUtensorMapFloatOOBfill); - static auto func_ptr = LoadSymbol("cuTensorMapEncodeTiled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorMap, tensorDataType, tensorRank, globalAddress, - globalDim, globalStrides, boxDim, elementStrides, interleave, - swizzle, l2Promotion, oobFill); -} - -CUresult CUDAAPI cuTensorMapEncodeIm2col( - CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, - cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, - const cuuint64_t *globalStrides, const int *pixelBoxLowerCorner, - const int *pixelBoxUpperCorner, cuuint32_t channelsPerPixel, - cuuint32_t pixelsPerColumn, const cuuint32_t *elementStrides, - CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, - CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) { - using FuncPtr = CUresult(CUDAAPI *)( - CUtensorMap *, CUtensorMapDataType, cuuint32_t, void *, - const cuuint64_t *, const cuuint64_t *, const int *, const int *, - cuuint32_t, cuuint32_t, const cuuint32_t *, CUtensorMapInterleave, - CUtensorMapSwizzle, CUtensorMapL2promotion, CUtensorMapFloatOOBfill); - static auto func_ptr = LoadSymbol("cuTensorMapEncodeIm2col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorMap, tensorDataType, tensorRank, globalAddress, - globalDim, globalStrides, pixelBoxLowerCorner, - pixelBoxUpperCorner, channelsPerPixel, pixelsPerColumn, - elementStrides, interleave, swizzle, l2Promotion, oobFill); -} - -CUresult CUDAAPI cuTensorMapReplaceAddress(CUtensorMap *tensorMap, - void *globalAddress) { - using FuncPtr = CUresult(CUDAAPI *)(CUtensorMap *, void *); - static auto func_ptr = LoadSymbol("cuTensorMapReplaceAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorMap, globalAddress); -} - -CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, - CUdevice peerDev) { - using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, dev, peerDev); -} - -CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, - unsigned int Flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); - static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext, Flags); -} - -CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { - using FuncPtr = CUresult(CUDAAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerContext); -} - -CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, - CUdevice_P2PAttribute attrib, - CUdevice srcDevice, - CUdevice dstDevice) { - using FuncPtr = - CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); - static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attrib, srcDevice, dstDevice); -} - -CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); - static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( - CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, - unsigned int mipLevel) { - using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pArray, resource, arrayIndex, mipLevel); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( - CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { - using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pMipmappedArray, resource); -} - -CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( - CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { - using FuncPtr = - CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevPtr, pSize, resource); -} - -CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, - unsigned int flags) { - using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); - static auto func_ptr = - LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, - CUgraphicsResource *resources, - CUstream hStream) { - using FuncPtr = - CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); - static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, hStream); -} - -CUresult CUDAAPI cuGetProcAddress( - const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, - CUdriverProcAddressQueryResult *symbolStatus) { - using FuncPtr = CUresult(CUDAAPI *)(const char *, void **, int, cuuint64_t, - CUdriverProcAddressQueryResult *); - static auto func_ptr = LoadSymbol("cuGetProcAddress_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, pfn, cudaVersion, flags, symbolStatus); -} - -CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, - const CUuuid *pExportTableId) { - using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); - static auto func_ptr = LoadSymbol("cuGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_0.inc deleted file mode 100644 index 6810c05d679a2f..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_0.inc +++ /dev/null @@ -1,1846 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(dim3, dim3, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaConfigureCall"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(gridDim, blockDim, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, - size_t size, - size_t offset) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaSetupArgument"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arg, size, offset); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *); - static auto func_ptr = LoadSymbol("cudaLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync( - void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, cudaGraphNode_t *from, - cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, cudaGraphNode_t *from, - cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_1.inc deleted file mode 100644 index d076cc4ac3a506..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_1.inc +++ /dev/null @@ -1,1854 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_2.inc deleted file mode 100644 index a5a5438b0e5472..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_10_2.inc +++ /dev/null @@ -1,1907 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_0.inc deleted file mode 100644 index c9cd0a37697a4e..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_0.inc +++ /dev/null @@ -1,2639 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetTexture1DLinearMaxWidth( - size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct cudaChannelFormatDesc *, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, fmtDesc, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -#if CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( - enum cudaFlushGPUDirectRDMAWritesTarget target, - enum cudaFlushGPUDirectRDMAWritesScope scope) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, - enum cudaFlushGPUDirectRDMAWritesScope); - static auto func_ptr = - LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -#endif // CUDA_VERSION >= 11030 - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -#endif // CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - union cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - const union cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - const union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo_v2( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), - unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaWaitExternalSemaphoresAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -#if CUDA_VERSION >= 11010 - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, - cudaMipmappedArray_t mipmap) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, - cudaMipmappedArray_t); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -#endif // CUDA_VERSION >= 11010 - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -#endif // CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaGetTextureAlignmentOffset(size_t *offset, - const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - union cudaKernelNodeAttrValue *value_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - const union cudaKernelNodeAttrValue *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - const union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const void *, const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, - src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, - symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( - cudaGraphNode_t node, const void *symbol, const void *src, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( - cudaGraphNode_t node, void *dst, const void *symbol, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, - size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreSignalNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreWaitNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -#endif // CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, - const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, - cudaGraphNode_t node, void *dst, - const void *symbol, size_t count, - size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, childGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresSignalNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresWaitNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( - cudaGraph_t graph, const char *path, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, path, flags); -} - -#if CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -#endif // CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI_CDECL -cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaFunction_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetFuncBySymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(functionPtr, symbolPtr); -} - -#endif // CUDA_VERSION >= 11020 - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_2.inc deleted file mode 100644 index 5c0ba7fe6a4e39..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_2.inc +++ /dev/null @@ -1,2259 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - union cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - const union cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - const union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo_v2( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaSignalExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaWaitExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaGetTextureAlignmentOffset(size_t *offset, - const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - union cudaKernelNodeAttrValue *value_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - const union cudaKernelNodeAttrValue *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - const union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -extern __host__ cudaError_t CUDARTAPI_CDECL -cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaFunction_t *, const void *); - static auto func_ptr = LoadSymbol("_CDECL cudaGetFuncBySymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(functionPtr, symbolPtr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -#if CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -#endif // CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_8.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_8.inc deleted file mode 100644 index 8000ce1f926a12..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_11_8.inc +++ /dev/null @@ -1,2771 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetTexture1DLinearMaxWidth( - size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct cudaChannelFormatDesc *, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, fmtDesc, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( - enum cudaFlushGPUDirectRDMAWritesTarget target, - enum cudaFlushGPUDirectRDMAWritesScope scope) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, - enum cudaFlushGPUDirectRDMAWritesScope); - static auto func_ptr = - LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - const cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - const cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo_v2( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), - unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaSignalExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaWaitExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchKernelExC( - const cudaLaunchConfig_t *config, const void *func, void **args) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const cudaLaunchConfig_t *, - const void *, void **); - static auto func_ptr = LoadSymbol("cudaLaunchKernelExC"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, func, args); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxPotentialClusterSize(int *clusterSize, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxPotentialClusterSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(clusterSize, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveClusters(int *numClusters, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = LoadSymbol("cudaOccupancyMaxActiveClusters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numClusters, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, cudaArray_t array, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaArray_t, int); - static auto func_ptr = LoadSymbol("cudaArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, array, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, - cudaMipmappedArray_t mipmap, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaMipmappedArray_t, int); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, mipmap, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, - cudaMipmappedArray_t mipmap) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, - cudaMipmappedArray_t); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaGetTextureAlignmentOffset(size_t *offset, - const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject_v2( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc_v2 *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc_v2 *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc_v2( - struct cudaTextureDesc_v2 *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaTextureDesc_v2 *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectTextureDesc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - cudaKernelNodeAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - const cudaKernelNodeAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, const cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const void *, const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, - src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, - symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( - cudaGraphNode_t node, const void *symbol, const void *src, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( - cudaGraphNode_t node, void *dst, const void *symbol, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, - size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreSignalNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreWaitNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemAllocNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - struct cudaMemAllocNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemAllocNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemAllocNodeGetParams( - cudaGraphNode_t node, struct cudaMemAllocNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemAllocNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, params_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemFreeNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemFreeNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void *dptr_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphMemFreeNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dptr_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGraphMemTrim(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceGraphMemTrim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceGetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceSetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiateWithFlags( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - unsigned long long); - static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, - const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, - cudaGraphNode_t node, void *dst, - const void *symbol, size_t count, - size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, childGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresSignalNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresWaitNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetEnabled( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphNodeSetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - unsigned int *isEnabled) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - unsigned int *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( - cudaGraph_t graph, const char *path, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, path, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -extern __host__ cudaError_t CUDARTAPI_CDECL -cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaFunction_t *, const void *); - static auto func_ptr = LoadSymbol("_CDECL cudaGetFuncBySymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(functionPtr, symbolPtr); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_12_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_12_0.inc deleted file mode 100644 index b488634d7d2c21..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_12_0.inc +++ /dev/null @@ -1,2676 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetTexture1DLinearMaxWidth( - size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct cudaChannelFormatDesc *, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, fmtDesc, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( - enum cudaFlushGPUDirectRDMAWritesTarget target, - enum cudaFlushGPUDirectRDMAWritesScope scope) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, - enum cudaFlushGPUDirectRDMAWritesScope); - static auto func_ptr = - LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaInitDevice(int device, - unsigned int deviceFlags, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaInitDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, deviceFlags, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetId(cudaStream_t hStream, unsigned long long *streamId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, streamId); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - const cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - const cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = - LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), - unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "cudaSignalExternalSemaphoresAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "cudaWaitExternalSemaphoresAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchKernelExC( - const cudaLaunchConfig_t *config, const void *func, void **args) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const cudaLaunchConfig_t *, - const void *, void **); - static auto func_ptr = LoadSymbol("cudaLaunchKernelExC"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, func, args); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxPotentialClusterSize(int *clusterSize, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxPotentialClusterSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(clusterSize, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveClusters(int *numClusters, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = LoadSymbol("cudaOccupancyMaxActiveClusters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numClusters, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, cudaArray_t array, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaArray_t, int); - static auto func_ptr = LoadSymbol("cudaArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, array, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, - cudaMipmappedArray_t mipmap, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaMipmappedArray_t, int); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, mipmap, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, - cudaMipmappedArray_t mipmap) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, - cudaMipmappedArray_t); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - cudaKernelNodeAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - const cudaKernelNodeAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, const cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const void *, const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, - src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, - symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( - cudaGraphNode_t node, const void *symbol, const void *src, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( - cudaGraphNode_t node, void *dst, const void *symbol, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, - size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreSignalNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreWaitNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemAllocNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - struct cudaMemAllocNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemAllocNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemAllocNodeGetParams( - cudaGraphNode_t node, struct cudaMemAllocNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemAllocNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, params_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemFreeNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemFreeNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void *dptr_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphMemFreeNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dptr_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGraphMemTrim(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceGraphMemTrim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceGetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceSetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, - unsigned long long flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - unsigned long long); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphInstantiateWithFlags(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, - unsigned long long flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - unsigned long long); - static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphInstantiateWithParams(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, - cudaGraphInstantiateParams *instantiateParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphInstantiateParams *); - static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, instantiateParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long *flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaGraphExecGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, - const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, - cudaGraphNode_t node, void *dst, - const void *symbol, size_t count, - size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, childGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresSignalNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresWaitNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetEnabled( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphNodeSetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - unsigned int *isEnabled) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - unsigned int *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphExecUpdateResultInfo *resultInfo) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, - cudaGraphExecUpdateResultInfo *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, resultInfo); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( - cudaGraph_t graph, const char *path, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, path, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags, - enum cudaDriverEntryPointQueryResult *driverStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long, - enum cudaDriverEntryPointQueryResult *); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags, driverStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_9_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_9_0.inc deleted file mode 100644 index 6753ddcf7829db..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_runtime_9_0.inc +++ /dev/null @@ -1,1421 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadGetLimit(size_t *pValue, - enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(dim3, dim3, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaConfigureCall"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(gridDim, blockDim, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, - size_t size, - size_t offset) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaSetupArgument"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arg, size, offset); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *); - static auto func_ptr = LoadSymbol("cudaLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync( - void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cuda_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cuda_stub.cc index aa00b3bbeb5875..a199d4cc700442 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cuda_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cuda_stub.cc @@ -33,40 +33,40 @@ void* GetDsoHandle() { #endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -CUresult GetSymbolNotFoundError() { +const char* kSymbols[] = { +#include "tsl/cuda/cuda.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + +} // namespace + +extern "C" { + +static CUresult GetSymbolNotFoundError() { return CUDA_ERROR_SHARED_OBJECT_INIT_FAILED; } -} // namespace -#if CUDA_VERSION < 10000 -#error CUDA version earlier than 10 is not supported. -#endif +extern void* _cuda_tramp_table[]; -#ifndef __CUDA_DEPRECATED -#define __CUDA_DEPRECATED -#endif +void _cuda_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cuda_tramp_table[i] = p; +} -#if CUDA_VERSION < 10010 -#include "tsl/cuda/cuda_10_0.inc" -#elif CUDA_VERSION < 10020 -#include "tsl/cuda/cuda_10_1.inc" -#elif CUDA_VERSION < 11000 -#include "tsl/cuda/cuda_10_2.inc" -#elif CUDA_VERSION < 11020 -#include "tsl/cuda/cuda_11_0.inc" -#elif CUDA_VERSION < 12000 -#include "tsl/cuda/cuda_11_2.inc" -#else -#include "tsl/cuda/cuda_12_0.inc" -#endif +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudart.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cudart.symbols new file mode 100644 index 00000000000000..69b990cb3879b5 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cudart.symbols @@ -0,0 +1,399 @@ +__cudaGetKernel +__cudaInitModule +__cudaLaunchKernel +__cudaLaunchKernel_ptsz +__cudaPopCallConfiguration +__cudaPushCallConfiguration +__cudaRegisterFatBinary +__cudaRegisterFatBinaryEnd +__cudaRegisterFunction +__cudaRegisterHostVar +__cudaRegisterManagedVar +__cudaRegisterUnifiedTable +__cudaRegisterVar +__cudaUnregisterFatBinary +cudaArrayGetInfo +cudaArrayGetMemoryRequirements +cudaArrayGetPlane +cudaArrayGetSparseProperties +cudaChooseDevice +cudaCreateChannelDesc +cudaCreateSurfaceObject +cudaCreateTextureObject +cudaCtxResetPersistingL2Cache +cudaDestroyExternalMemory +cudaDestroyExternalSemaphore +cudaDestroySurfaceObject +cudaDestroyTextureObject +cudaDeviceCanAccessPeer +cudaDeviceDisablePeerAccess +cudaDeviceEnablePeerAccess +cudaDeviceFlushGPUDirectRDMAWrites +cudaDeviceGetAttribute +cudaDeviceGetByPCIBusId +cudaDeviceGetCacheConfig +cudaDeviceGetDefaultMemPool +cudaDeviceGetGraphMemAttribute +cudaDeviceGetLimit +cudaDeviceGetMemPool +cudaDeviceGetNvSciSyncAttributes +cudaDeviceGetP2PAttribute +cudaDeviceGetPCIBusId +cudaDeviceGetSharedMemConfig +cudaDeviceGetStreamPriorityRange +cudaDeviceGetTexture1DLinearMaxWidth +cudaDeviceGraphMemTrim +cudaDeviceReset +cudaDeviceSetCacheConfig +cudaDeviceSetGraphMemAttribute +cudaDeviceSetLimit +cudaDeviceSetMemPool +cudaDeviceSetSharedMemConfig +cudaDeviceSynchronize +cudaDriverGetVersion +cudaEGLStreamConsumerAcquireFrame +cudaEGLStreamConsumerConnect +cudaEGLStreamConsumerConnectWithFlags +cudaEGLStreamConsumerDisconnect +cudaEGLStreamConsumerReleaseFrame +cudaEGLStreamProducerConnect +cudaEGLStreamProducerDisconnect +cudaEGLStreamProducerPresentFrame +cudaEGLStreamProducerReturnFrame +cudaEventCreate +cudaEventCreateFromEGLSync +cudaEventCreateWithFlags +cudaEventDestroy +cudaEventElapsedTime +cudaEventQuery +cudaEventRecord +cudaEventRecordWithFlags +cudaEventRecordWithFlags_ptsz +cudaEventRecord_ptsz +cudaEventSynchronize +cudaExternalMemoryGetMappedBuffer +cudaExternalMemoryGetMappedMipmappedArray +cudaFree +cudaFreeArray +cudaFreeAsync +cudaFreeAsync_ptsz +cudaFreeHost +cudaFreeMipmappedArray +cudaFuncGetAttributes +cudaFuncSetAttribute +cudaFuncSetCacheConfig +cudaFuncSetSharedMemConfig +cudaGLGetDevices +cudaGLMapBufferObject +cudaGLMapBufferObjectAsync +cudaGLRegisterBufferObject +cudaGLSetBufferObjectMapFlags +cudaGLSetGLDevice +cudaGLUnmapBufferObject +cudaGLUnmapBufferObjectAsync +cudaGLUnregisterBufferObject +cudaGetChannelDesc +cudaGetDevice +cudaGetDeviceCount +cudaGetDeviceFlags +cudaGetDeviceProperties +cudaGetDeviceProperties_v2 +cudaGetDriverEntryPoint +cudaGetDriverEntryPoint_ptsz +cudaGetErrorName +cudaGetErrorString +cudaGetExportTable +cudaGetFuncBySymbol +cudaGetKernel +cudaGetLastError +cudaGetMipmappedArrayLevel +cudaGetSurfaceObjectResourceDesc +cudaGetSymbolAddress +cudaGetSymbolSize +cudaGetTextureObjectResourceDesc +cudaGetTextureObjectResourceViewDesc +cudaGetTextureObjectTextureDesc +cudaGraphAddChildGraphNode +cudaGraphAddDependencies +cudaGraphAddEmptyNode +cudaGraphAddEventRecordNode +cudaGraphAddEventWaitNode +cudaGraphAddExternalSemaphoresSignalNode +cudaGraphAddExternalSemaphoresWaitNode +cudaGraphAddHostNode +cudaGraphAddKernelNode +cudaGraphAddMemAllocNode +cudaGraphAddMemFreeNode +cudaGraphAddMemcpyNode +cudaGraphAddMemcpyNode1D +cudaGraphAddMemcpyNodeFromSymbol +cudaGraphAddMemcpyNodeToSymbol +cudaGraphAddMemsetNode +cudaGraphAddNode +cudaGraphChildGraphNodeGetGraph +cudaGraphClone +cudaGraphCreate +cudaGraphDebugDotPrint +cudaGraphDestroy +cudaGraphDestroyNode +cudaGraphEventRecordNodeGetEvent +cudaGraphEventRecordNodeSetEvent +cudaGraphEventWaitNodeGetEvent +cudaGraphEventWaitNodeSetEvent +cudaGraphExecChildGraphNodeSetParams +cudaGraphExecDestroy +cudaGraphExecEventRecordNodeSetEvent +cudaGraphExecEventWaitNodeSetEvent +cudaGraphExecExternalSemaphoresSignalNodeSetParams +cudaGraphExecExternalSemaphoresWaitNodeSetParams +cudaGraphExecGetFlags +cudaGraphExecHostNodeSetParams +cudaGraphExecKernelNodeSetParams +cudaGraphExecMemcpyNodeSetParams +cudaGraphExecMemcpyNodeSetParams1D +cudaGraphExecMemcpyNodeSetParamsFromSymbol +cudaGraphExecMemcpyNodeSetParamsToSymbol +cudaGraphExecMemsetNodeSetParams +cudaGraphExecNodeSetParams +cudaGraphExecUpdate +cudaGraphExternalSemaphoresSignalNodeGetParams +cudaGraphExternalSemaphoresSignalNodeSetParams +cudaGraphExternalSemaphoresWaitNodeGetParams +cudaGraphExternalSemaphoresWaitNodeSetParams +cudaGraphGetEdges +cudaGraphGetNodes +cudaGraphGetRootNodes +cudaGraphHostNodeGetParams +cudaGraphHostNodeSetParams +cudaGraphInstantiate +cudaGraphInstantiateWithFlags +cudaGraphInstantiateWithParams +cudaGraphInstantiateWithParams_ptsz +cudaGraphKernelNodeCopyAttributes +cudaGraphKernelNodeGetAttribute +cudaGraphKernelNodeGetParams +cudaGraphKernelNodeSetAttribute +cudaGraphKernelNodeSetParams +cudaGraphLaunch +cudaGraphLaunch_ptsz +cudaGraphMemAllocNodeGetParams +cudaGraphMemFreeNodeGetParams +cudaGraphMemcpyNodeGetParams +cudaGraphMemcpyNodeSetParams +cudaGraphMemcpyNodeSetParams1D +cudaGraphMemcpyNodeSetParamsFromSymbol +cudaGraphMemcpyNodeSetParamsToSymbol +cudaGraphMemsetNodeGetParams +cudaGraphMemsetNodeSetParams +cudaGraphNodeFindInClone +cudaGraphNodeGetDependencies +cudaGraphNodeGetDependentNodes +cudaGraphNodeGetEnabled +cudaGraphNodeGetType +cudaGraphNodeSetEnabled +cudaGraphNodeSetParams +cudaGraphReleaseUserObject +cudaGraphRemoveDependencies +cudaGraphRetainUserObject +cudaGraphUpload +cudaGraphUpload_ptsz +cudaGraphicsEGLRegisterImage +cudaGraphicsGLRegisterBuffer +cudaGraphicsGLRegisterImage +cudaGraphicsMapResources +cudaGraphicsResourceGetMappedEglFrame +cudaGraphicsResourceGetMappedMipmappedArray +cudaGraphicsResourceGetMappedPointer +cudaGraphicsResourceSetMapFlags +cudaGraphicsSubResourceGetMappedArray +cudaGraphicsUnmapResources +cudaGraphicsUnregisterResource +cudaGraphicsVDPAURegisterOutputSurface +cudaGraphicsVDPAURegisterVideoSurface +cudaHostAlloc +cudaHostGetDevicePointer +cudaHostGetFlags +cudaHostRegister +cudaHostUnregister +cudaImportExternalMemory +cudaImportExternalSemaphore +cudaInitDevice +cudaIpcCloseMemHandle +cudaIpcGetEventHandle +cudaIpcGetMemHandle +cudaIpcOpenEventHandle +cudaIpcOpenMemHandle +cudaLaunchCooperativeKernel +cudaLaunchCooperativeKernelMultiDevice +cudaLaunchCooperativeKernel_ptsz +cudaLaunchHostFunc +cudaLaunchHostFunc_ptsz +cudaLaunchKernel +cudaLaunchKernelExC +cudaLaunchKernelExC_ptsz +cudaLaunchKernel_ptsz +cudaMalloc +cudaMalloc3D +cudaMalloc3DArray +cudaMallocArray +cudaMallocAsync +cudaMallocAsync_ptsz +cudaMallocFromPoolAsync +cudaMallocFromPoolAsync_ptsz +cudaMallocHost +cudaMallocManaged +cudaMallocMipmappedArray +cudaMallocPitch +cudaMemAdvise +cudaMemAdvise_v2 +cudaMemGetInfo +cudaMemPoolCreate +cudaMemPoolDestroy +cudaMemPoolExportPointer +cudaMemPoolExportToShareableHandle +cudaMemPoolGetAccess +cudaMemPoolGetAttribute +cudaMemPoolImportFromShareableHandle +cudaMemPoolImportPointer +cudaMemPoolSetAccess +cudaMemPoolSetAttribute +cudaMemPoolTrimTo +cudaMemPrefetchAsync +cudaMemPrefetchAsync_ptsz +cudaMemPrefetchAsync_v2 +cudaMemPrefetchAsync_v2_ptsz +cudaMemRangeGetAttribute +cudaMemRangeGetAttributes +cudaMemcpy +cudaMemcpy2D +cudaMemcpy2DArrayToArray +cudaMemcpy2DArrayToArray_ptds +cudaMemcpy2DAsync +cudaMemcpy2DAsync_ptsz +cudaMemcpy2DFromArray +cudaMemcpy2DFromArrayAsync +cudaMemcpy2DFromArrayAsync_ptsz +cudaMemcpy2DFromArray_ptds +cudaMemcpy2DToArray +cudaMemcpy2DToArrayAsync +cudaMemcpy2DToArrayAsync_ptsz +cudaMemcpy2DToArray_ptds +cudaMemcpy2D_ptds +cudaMemcpy3D +cudaMemcpy3DAsync +cudaMemcpy3DAsync_ptsz +cudaMemcpy3DPeer +cudaMemcpy3DPeerAsync +cudaMemcpy3DPeerAsync_ptsz +cudaMemcpy3DPeer_ptds +cudaMemcpy3D_ptds +cudaMemcpyArrayToArray +cudaMemcpyArrayToArray_ptds +cudaMemcpyAsync +cudaMemcpyAsync_ptsz +cudaMemcpyFromArray +cudaMemcpyFromArrayAsync +cudaMemcpyFromArrayAsync_ptsz +cudaMemcpyFromArray_ptds +cudaMemcpyFromSymbol +cudaMemcpyFromSymbolAsync +cudaMemcpyFromSymbolAsync_ptsz +cudaMemcpyFromSymbol_ptds +cudaMemcpyPeer +cudaMemcpyPeerAsync +cudaMemcpyToArray +cudaMemcpyToArrayAsync +cudaMemcpyToArrayAsync_ptsz +cudaMemcpyToArray_ptds +cudaMemcpyToSymbol +cudaMemcpyToSymbolAsync +cudaMemcpyToSymbolAsync_ptsz +cudaMemcpyToSymbol_ptds +cudaMemcpy_ptds +cudaMemset +cudaMemset2D +cudaMemset2DAsync +cudaMemset2DAsync_ptsz +cudaMemset2D_ptds +cudaMemset3D +cudaMemset3DAsync +cudaMemset3DAsync_ptsz +cudaMemset3D_ptds +cudaMemsetAsync +cudaMemsetAsync_ptsz +cudaMemset_ptds +cudaMipmappedArrayGetMemoryRequirements +cudaMipmappedArrayGetSparseProperties +cudaOccupancyAvailableDynamicSMemPerBlock +cudaOccupancyMaxActiveBlocksPerMultiprocessor +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +cudaOccupancyMaxActiveClusters +cudaOccupancyMaxPotentialClusterSize +cudaPeekAtLastError +cudaPointerGetAttributes +cudaProfilerStart +cudaProfilerStop +cudaRuntimeGetVersion +cudaSetDevice +cudaSetDeviceFlags +cudaSetDoubleForDevice +cudaSetDoubleForHost +cudaSetValidDevices +cudaSignalExternalSemaphoresAsync +cudaSignalExternalSemaphoresAsync_ptsz +cudaSignalExternalSemaphoresAsync_v2 +cudaSignalExternalSemaphoresAsync_v2_ptsz +cudaStreamAddCallback +cudaStreamAddCallback_ptsz +cudaStreamAttachMemAsync +cudaStreamAttachMemAsync_ptsz +cudaStreamBeginCapture +cudaStreamBeginCapture_ptsz +cudaStreamCopyAttributes +cudaStreamCopyAttributes_ptsz +cudaStreamCreate +cudaStreamCreateWithFlags +cudaStreamCreateWithPriority +cudaStreamDestroy +cudaStreamEndCapture +cudaStreamEndCapture_ptsz +cudaStreamGetAttribute +cudaStreamGetAttribute_ptsz +cudaStreamGetCaptureInfo +cudaStreamGetCaptureInfo_ptsz +cudaStreamGetCaptureInfo_v2 +cudaStreamGetCaptureInfo_v2_ptsz +cudaStreamGetFlags +cudaStreamGetFlags_ptsz +cudaStreamGetId +cudaStreamGetId_ptsz +cudaStreamGetPriority +cudaStreamGetPriority_ptsz +cudaStreamIsCapturing +cudaStreamIsCapturing_ptsz +cudaStreamQuery +cudaStreamQuery_ptsz +cudaStreamSetAttribute +cudaStreamSetAttribute_ptsz +cudaStreamSynchronize +cudaStreamSynchronize_ptsz +cudaStreamUpdateCaptureDependencies +cudaStreamUpdateCaptureDependencies_ptsz +cudaStreamWaitEvent +cudaStreamWaitEvent_ptsz +cudaThreadExchangeStreamCaptureMode +cudaThreadExit +cudaThreadGetCacheConfig +cudaThreadGetLimit +cudaThreadSetCacheConfig +cudaThreadSetLimit +cudaThreadSynchronize +cudaUserObjectCreate +cudaUserObjectRelease +cudaUserObjectRetain +cudaVDPAUGetDevice +cudaVDPAUSetVDPAUDevice +cudaWaitExternalSemaphoresAsync +cudaWaitExternalSemaphoresAsync_ptsz +cudaWaitExternalSemaphoresAsync_v2 +cudaWaitExternalSemaphoresAsync_v2_ptsz diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc index aa11f03d5a3e12..cfae868dc667fc 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc @@ -34,119 +34,37 @@ void *GetDsoHandle() { return handle; } -template -T LoadSymbol(const char *symbol_name) { +void *LoadSymbol(const char *symbol_name) { void *symbol = nullptr; auto env = tsl::Env::Default(); env->GetSymbolFromLibrary(GetDsoHandle(), symbol_name, &symbol).IgnoreError(); - return reinterpret_cast(symbol); + return symbol; } -cudaError_t GetSymbolNotFoundError() { - return cudaErrorSharedObjectSymbolNotFound; -} -} // namespace - -#define __dv(v) -#define __CUDA_DEPRECATED -// A bunch of new symbols were introduced in version 10 -#if CUDART_VERSION < 10000 -#include "tsl/cuda/cuda_runtime_9_0.inc" -#elif CUDART_VERSION < 10010 -#include "tsl/cuda/cuda_runtime_10_0.inc" -#elif CUDART_VERSION < 10020 -#include "tsl/cuda/cuda_runtime_10_1.inc" -#elif CUDART_VERSION < 11000 -#include "tsl/cuda/cuda_runtime_10_2.inc" -#elif CUDART_VERSION < 11020 -#include "tsl/cuda/cuda_runtime_11_0.inc" -#elif CUDART_VERSION < 11080 -#include "tsl/cuda/cuda_runtime_11_2.inc" -#elif CUDART_VERSION < 12000 -#include "tsl/cuda/cuda_runtime_11_8.inc" -#else -#include "tsl/cuda/cuda_runtime_12_0.inc" -#endif -#undef __dv -#undef __CUDA_DEPRECATED - -extern "C" { - -// Following are private symbols in libcudart that got inserted by nvcc. -extern void CUDARTAPI __cudaRegisterFunction( - void **fatCubinHandle, const char *hostFun, char *deviceFun, - const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, - dim3 *bDim, dim3 *gDim, int *wSize) { - using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle, const char *hostFun, - char *deviceFun, const char *deviceName, - int thread_limit, uint3 *tid, uint3 *bid, - dim3 *bDim, dim3 *gDim, int *wSize); - static auto func_ptr = LoadSymbol("__cudaRegisterFunction"); - if (!func_ptr) return; - func_ptr(fatCubinHandle, hostFun, deviceFun, deviceName, thread_limit, tid, - bid, bDim, gDim, wSize); -} +const char *kSymbols[] = { +#include "tsl/cuda/cudart.inc" +}; -extern void CUDARTAPI __cudaUnregisterFatBinary(void **fatCubinHandle) { - using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle); - static auto func_ptr = LoadSymbol("__cudaUnregisterFatBinary"); - if (!func_ptr) return; - func_ptr(fatCubinHandle); -} +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char *); -extern void CUDARTAPI __cudaRegisterVar(void **fatCubinHandle, char *hostVar, - char *deviceAddress, - const char *deviceName, int ext, - size_t size, int constant, int global) { - using FuncPtr = void(CUDARTAPI *)( - void **fatCubinHandle, char *hostVar, char *deviceAddress, - const char *deviceName, int ext, size_t size, int constant, int global); - static auto func_ptr = LoadSymbol("__cudaRegisterVar"); - if (!func_ptr) return; - func_ptr(fatCubinHandle, hostVar, deviceAddress, deviceName, ext, size, - constant, global); -} +} // namespace -extern void **CUDARTAPI __cudaRegisterFatBinary(void *fatCubin) { - using FuncPtr = void **(CUDARTAPI *)(void *fatCubin); - static auto func_ptr = LoadSymbol("__cudaRegisterFatBinary"); - if (!func_ptr) return nullptr; - return (void **)func_ptr(fatCubin); -} +extern "C" { -extern cudaError_t CUDARTAPI __cudaPopCallConfiguration(dim3 *gridDim, - dim3 *blockDim, - size_t *sharedMem, - void *stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(dim3 * gridDim, dim3 * blockDim, - size_t * sharedMem, void *stream); - static auto func_ptr = LoadSymbol("__cudaPopCallConfiguration"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(gridDim, blockDim, sharedMem, stream); +static cudaError_t CudartGetSymbolNotFoundError() { + return cudaErrorSharedObjectSymbolNotFound; } -extern __host__ __device__ unsigned CUDARTAPI __cudaPushCallConfiguration( - dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0) { - using FuncPtr = unsigned(CUDARTAPI *)(dim3 gridDim, dim3 blockDim, - size_t sharedMem, void *stream); - static auto func_ptr = LoadSymbol("__cudaPushCallConfiguration"); - if (!func_ptr) return 0; - return func_ptr(gridDim, blockDim, sharedMem, stream); -} +extern void *_cudart_tramp_table[]; -extern char CUDARTAPI __cudaInitModule(void **fatCubinHandle) { - using FuncPtr = char(CUDARTAPI *)(void **fatCubinHandle); - static auto func_ptr = LoadSymbol("__cudaInitModule"); - if (!func_ptr) return 0; - return func_ptr(fatCubinHandle); +void _cudart_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void *p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&CudartGetSymbolNotFoundError); + } + _cudart_tramp_table[i] = p; } -#if CUDART_VERSION >= 10010 -extern void CUDARTAPI __cudaRegisterFatBinaryEnd(void **fatCubinHandle) { - using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle); - static auto func_ptr = LoadSymbol("__cudaRegisterFatBinaryEnd"); - if (!func_ptr) return; - func_ptr(fatCubinHandle); -} -#endif } // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cudnn.symbols new file mode 100644 index 00000000000000..2c4dbd71030b38 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cudnn.symbols @@ -0,0 +1,268 @@ +cudnnActivationBackward +cudnnActivationForward +cudnnAddTensor +cudnnAdvInferVersionCheck +cudnnAdvTrainVersionCheck +cudnnBackendCreateDescriptor +cudnnBackendDestroyDescriptor +cudnnBackendExecute +cudnnBackendFinalize +cudnnBackendGetAttribute +cudnnBackendInitialize +cudnnBackendSetAttribute +cudnnBatchNormalizationBackward +cudnnBatchNormalizationBackwardEx +cudnnBatchNormalizationForwardInference +cudnnBatchNormalizationForwardTraining +cudnnBatchNormalizationForwardTrainingEx +cudnnBuildRNNDynamic +cudnnCTCLoss +cudnnCTCLoss_v8 +cudnnCnnInferVersionCheck +cudnnCnnTrainVersionCheck +cudnnConvolutionBackwardBias +cudnnConvolutionBackwardData +cudnnConvolutionBackwardFilter +cudnnConvolutionBiasActivationForward +cudnnConvolutionForward +cudnnCopyAlgorithmDescriptor +cudnnCreate +cudnnCreateActivationDescriptor +cudnnCreateAlgorithmDescriptor +cudnnCreateAlgorithmPerformance +cudnnCreateAttnDescriptor +cudnnCreateCTCLossDescriptor +cudnnCreateConvolutionDescriptor +cudnnCreateDropoutDescriptor +cudnnCreateFilterDescriptor +cudnnCreateFusedOpsConstParamPack +cudnnCreateFusedOpsPlan +cudnnCreateFusedOpsVariantParamPack +cudnnCreateLRNDescriptor +cudnnCreateOpTensorDescriptor +cudnnCreatePersistentRNNPlan +cudnnCreatePoolingDescriptor +cudnnCreateRNNDataDescriptor +cudnnCreateRNNDescriptor +cudnnCreateReduceTensorDescriptor +cudnnCreateSeqDataDescriptor +cudnnCreateSpatialTransformerDescriptor +cudnnCreateTensorDescriptor +cudnnCreateTensorTransformDescriptor +cudnnDeriveBNTensorDescriptor +cudnnDeriveNormTensorDescriptor +cudnnDestroy +cudnnDestroyActivationDescriptor +cudnnDestroyAlgorithmDescriptor +cudnnDestroyAlgorithmPerformance +cudnnDestroyAttnDescriptor +cudnnDestroyCTCLossDescriptor +cudnnDestroyConvolutionDescriptor +cudnnDestroyDropoutDescriptor +cudnnDestroyFilterDescriptor +cudnnDestroyFusedOpsConstParamPack +cudnnDestroyFusedOpsPlan +cudnnDestroyFusedOpsVariantParamPack +cudnnDestroyLRNDescriptor +cudnnDestroyOpTensorDescriptor +cudnnDestroyPersistentRNNPlan +cudnnDestroyPoolingDescriptor +cudnnDestroyRNNDataDescriptor +cudnnDestroyRNNDescriptor +cudnnDestroyReduceTensorDescriptor +cudnnDestroySeqDataDescriptor +cudnnDestroySpatialTransformerDescriptor +cudnnDestroyTensorDescriptor +cudnnDestroyTensorTransformDescriptor +cudnnDivisiveNormalizationBackward +cudnnDivisiveNormalizationForward +cudnnDropoutBackward +cudnnDropoutForward +cudnnDropoutGetReserveSpaceSize +cudnnDropoutGetStatesSize +cudnnFindConvolutionBackwardDataAlgorithm +cudnnFindConvolutionBackwardDataAlgorithmEx +cudnnFindConvolutionBackwardFilterAlgorithm +cudnnFindConvolutionBackwardFilterAlgorithmEx +cudnnFindConvolutionForwardAlgorithm +cudnnFindConvolutionForwardAlgorithmEx +cudnnFindRNNBackwardDataAlgorithmEx +cudnnFindRNNBackwardWeightsAlgorithmEx +cudnnFindRNNForwardInferenceAlgorithmEx +cudnnFindRNNForwardTrainingAlgorithmEx +cudnnFusedOpsExecute +cudnnGetActivationDescriptor +cudnnGetActivationDescriptorSwishBeta +cudnnGetAlgorithmDescriptor +cudnnGetAlgorithmPerformance +cudnnGetAlgorithmSpaceSize +cudnnGetAttnDescriptor +cudnnGetBackdoor +cudnnGetBatchNormalizationBackwardExWorkspaceSize +cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize +cudnnGetBatchNormalizationTrainingExReserveSpaceSize +cudnnGetCTCLossDescriptor +cudnnGetCTCLossDescriptorEx +cudnnGetCTCLossDescriptor_v8 +cudnnGetCTCLossWorkspaceSize +cudnnGetCTCLossWorkspaceSize_v8 +cudnnGetCallback +cudnnGetConvolution2dDescriptor +cudnnGetConvolution2dForwardOutputDim +cudnnGetConvolutionBackwardDataAlgorithmMaxCount +cudnnGetConvolutionBackwardDataAlgorithm_v7 +cudnnGetConvolutionBackwardDataWorkspaceSize +cudnnGetConvolutionBackwardFilterAlgorithmMaxCount +cudnnGetConvolutionBackwardFilterAlgorithm_v7 +cudnnGetConvolutionBackwardFilterWorkspaceSize +cudnnGetConvolutionForwardAlgorithmMaxCount +cudnnGetConvolutionForwardAlgorithm_v7 +cudnnGetConvolutionForwardWorkspaceSize +cudnnGetConvolutionGroupCount +cudnnGetConvolutionMathType +cudnnGetConvolutionNdDescriptor +cudnnGetConvolutionNdForwardOutputDim +cudnnGetConvolutionReorderType +cudnnGetCudartVersion +cudnnGetDropoutDescriptor +cudnnGetErrorString +cudnnGetFilter4dDescriptor +cudnnGetFilterNdDescriptor +cudnnGetFilterSizeInBytes +cudnnGetFoldedConvBackwardDataDescriptors +cudnnGetFusedOpsConstParamPackAttribute +cudnnGetFusedOpsVariantParamPackAttribute +cudnnGetLRNDescriptor +cudnnGetMaxDeviceVersion +cudnnGetMultiHeadAttnBuffers +cudnnGetMultiHeadAttnWeights +cudnnGetNormalizationBackwardWorkspaceSize +cudnnGetNormalizationForwardTrainingWorkspaceSize +cudnnGetNormalizationTrainingReserveSpaceSize +cudnnGetOpTensorDescriptor +cudnnGetPooling2dDescriptor +cudnnGetPooling2dForwardOutputDim +cudnnGetPoolingNdDescriptor +cudnnGetPoolingNdForwardOutputDim +cudnnGetProperty +cudnnGetRNNBackwardDataAlgorithmMaxCount +cudnnGetRNNBackwardWeightsAlgorithmMaxCount +cudnnGetRNNBiasMode +cudnnGetRNNDataDescriptor +cudnnGetRNNDescriptor_v6 +cudnnGetRNNDescriptor_v8 +cudnnGetRNNDropoutLocationsInternal +cudnnGetRNNForwardInferenceAlgorithmMaxCount +cudnnGetRNNForwardTrainingAlgorithmMaxCount +cudnnGetRNNLinLayerBiasParams +cudnnGetRNNLinLayerMatrixParams +cudnnGetRNNMatrixMathType +cudnnGetRNNPaddingMode +cudnnGetRNNParamsSize +cudnnGetRNNProjectionLayers +cudnnGetRNNTempSpaceSizes +cudnnGetRNNTrainingReserveSize +cudnnGetRNNWeightParams +cudnnGetRNNWeightSpaceSize +cudnnGetRNNWorkspaceSize +cudnnGetReduceTensorDescriptor +cudnnGetReductionIndicesSize +cudnnGetReductionWorkspaceSize +cudnnGetSeqDataDescriptor +cudnnGetStream +cudnnGetTensor4dDescriptor +cudnnGetTensorNdDescriptor +cudnnGetTensorSizeInBytes +cudnnGetTensorTransformDescriptor +cudnnGetVersion +cudnnIm2Col +cudnnInitTransformDest +cudnnLRNCrossChannelBackward +cudnnLRNCrossChannelForward +cudnnMakeFusedOpsPlan +cudnnMultiHeadAttnBackwardData +cudnnMultiHeadAttnBackwardWeights +cudnnMultiHeadAttnForward +cudnnNormalizationBackward +cudnnNormalizationForwardInference +cudnnNormalizationForwardTraining +cudnnOpTensor +cudnnOpsInferVersionCheck +cudnnOpsTrainVersionCheck +cudnnPoolingBackward +cudnnPoolingForward +cudnnQueryRuntimeError +cudnnRNNBackwardData +cudnnRNNBackwardDataEx +cudnnRNNBackwardData_v8 +cudnnRNNBackwardWeights +cudnnRNNBackwardWeightsEx +cudnnRNNBackwardWeights_v8 +cudnnRNNForward +cudnnRNNForwardInference +cudnnRNNForwardInferenceEx +cudnnRNNForwardTraining +cudnnRNNForwardTrainingEx +cudnnRNNGetClip +cudnnRNNGetClip_v8 +cudnnRNNSetClip +cudnnRNNSetClip_v8 +cudnnReduceTensor +cudnnReorderFilterAndBias +cudnnRestoreAlgorithm +cudnnRestoreDropoutDescriptor +cudnnSaveAlgorithm +cudnnScaleTensor +cudnnSetActivationDescriptor +cudnnSetActivationDescriptorSwishBeta +cudnnSetAlgorithmDescriptor +cudnnSetAlgorithmPerformance +cudnnSetAttnDescriptor +cudnnSetBackdoor +cudnnSetBackdoorEx +cudnnSetCTCLossDescriptor +cudnnSetCTCLossDescriptorEx +cudnnSetCTCLossDescriptor_v8 +cudnnSetCallback +cudnnSetConvolution2dDescriptor +cudnnSetConvolutionGroupCount +cudnnSetConvolutionMathType +cudnnSetConvolutionNdDescriptor +cudnnSetConvolutionReorderType +cudnnSetDropoutDescriptor +cudnnSetFilter4dDescriptor +cudnnSetFilterNdDescriptor +cudnnSetFusedOpsConstParamPackAttribute +cudnnSetFusedOpsVariantParamPackAttribute +cudnnSetLRNDescriptor +cudnnSetOpTensorDescriptor +cudnnSetPersistentRNNPlan +cudnnSetPooling2dDescriptor +cudnnSetPoolingNdDescriptor +cudnnSetRNNAlgorithmDescriptor +cudnnSetRNNBiasMode +cudnnSetRNNDataDescriptor +cudnnSetRNNDescriptor_v6 +cudnnSetRNNDescriptor_v8 +cudnnSetRNNMatrixMathType +cudnnSetRNNPaddingMode +cudnnSetRNNProjectionLayers +cudnnSetReduceTensorDescriptor +cudnnSetSeqDataDescriptor +cudnnSetSpatialTransformerNdDescriptor +cudnnSetStream +cudnnSetTensor +cudnnSetTensor4dDescriptor +cudnnSetTensor4dDescriptorEx +cudnnSetTensorNdDescriptor +cudnnSetTensorNdDescriptorEx +cudnnSetTensorTransformDescriptor +cudnnSoftmaxBackward +cudnnSoftmaxForward +cudnnSpatialTfGridGeneratorBackward +cudnnSpatialTfGridGeneratorForward +cudnnSpatialTfSamplerBackward +cudnnSpatialTfSamplerForward +cudnnTransformFilter +cudnnTransformTensor +cudnnTransformTensorEx diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_6_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_6_0.inc deleted file mode 100644 index 11288983a4a1a4..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_6_0.inc +++ /dev/null @@ -1,1825 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, // image data type - int n, // number of inputs (batch size) - int c, // number of input feature maps - int h, // height of input section - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, // image data type - int n, // number of inputs (batch size) - int c, // number of input feature maps - int h, // height of input section - int w, // width of input section - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, // image data type - int *n, // number of inputs (batch size) - int *c, // number of input feature maps - int *h, // height of input section - int *w, // width of input section - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, // image data type - cudnnTensorFormat_t format, - int k, // number of output feature maps - int c, // number of input feature maps - int h, // height of each input filter - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, // image data type - cudnnTensorFormat_t *format, - int *k, // number of output feature maps - int *c, // number of input feature maps - int *h, // height of each input filter - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, // image data type - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, // image data type - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, - int pad_h, // zero-padding height - int pad_w, // zero-padding width - int u, // vertical filter stride - int v, // horizontal filter stride - int dilation_h, // filter dilation in the vertical dimension - int dilation_w, // filter dilation in the horizontal dimension - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, // zero-padding height - int *pad_w, // zero-padding width - int *u, // vertical filter stride - int *v, // horizontal filter stride - int *dilation_h, // filter dilation in the vertical dimension - int *dilation_w, // filter dilation in the horizontal dimension - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOutputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOutputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, - cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionFwdAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, - cudnnConvolutionFwdAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, - cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdFilterAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, - size_t, cudnnConvolutionBwdFilterAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, - cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdDataAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, - size_t, cudnnConvolutionBwdDataAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, // same desc for means, temp, temp2 - const void *x, - const void *means, // if NULL, means are assumed to be zero - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, // same desc for x, means, dy, temp, temp2 - const void *x, - const void *means, // if NULL, means are assumed to be zero - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, // same desc for dx, dMeans - void *dx, // output x differential - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, // alpha[0] = result blend factor - const void *beta, // beta[0] = dest layer blend factor - - const cudnnTensorDescriptor_t xDesc, - const void *x, // NxCxHxW - const cudnnTensorDescriptor_t yDesc, - void *y, // NxCxHxW - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - // 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, // alpha[0] = result blend factor - const void *beta, // beta[0] = dest layer blend factor - const cudnnTensorDescriptor_t xDesc, - const void *x, // NxCxHxW - const cudnnTensorDescriptor_t yDesc, - void *y, // NxCxHxW - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, // same desc for x, dx, dy - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, // bnBias doesn't affect backpropagation - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, // Between layers, not between recurrent steps. - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor( - cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, // Between layers, not between recurrent steps. - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, - cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, - cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, - direction, mode, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer, - const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, - const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer, - const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, - const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor_v4( - cudnnConvolutionDescriptor_t convDesc, - int pad_h, // zero-padding height - int pad_w, // zero-padding width - int u, // vertical filter stride - int v, // horizontal filter stride - int dilation_h, // filter dilation in the vertical dimension - int dilation_w, // filter dilation in the horizontal dimension - cudnnConvolutionMode_t mode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, - int, int, int, cudnnConvolutionMode_t); - static auto func_ptr = - LoadSymbol("cudnnSetConvolution2dDescriptor_v4"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor_v5( - cudnnConvolutionDescriptor_t convDesc, - int pad_h, // zero-padding height - int pad_w, // zero-padding width - int u, // vertical filter stride - int v, // horizontal filter stride - int dilation_h, // filter dilation in the vertical dimension - int dilation_w, // filter dilation in the horizontal dimension - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = - LoadSymbol("cudnnSetConvolution2dDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor_v4( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, // zero-padding height - int *pad_w, // zero-padding width - int *u, // vertical filter stride - int *v, // horizontal filter stride - int *dilation_h, // filter dilation in the vertical dimension - int *dilation_w, // filter dilation in the horizontal dimension - cudnnConvolutionMode_t *mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dDescriptor_v4"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor_v5( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, // zero-padding height - int *pad_w, // zero-padding width - int *u, // vertical filter stride - int *v, // horizontal filter stride - int *dilation_h, // filter dilation in the vertical dimension - int *dilation_w, // filter dilation in the horizontal dimension - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_0.inc deleted file mode 100644 index 008ae9099c0b09..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_0.inc +++ /dev/null @@ -1,2027 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, - cudnnStatus_t *rstatus, - cudnnErrQueryMode_t mode, - cudnnRuntimeTag_t *tag) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); - static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rstatus, mode, tag); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w, /* width of input section */ - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, /* image data type */ - int *n, /* number of inputs (batch size) */ - int *c, /* number of input feature maps */ - int *h, /* height of input section */ - int *w, /* width of input section */ - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int k, /* number of output feature maps */ - int c, /* number of input feature maps */ - int h, /* height of each input filter */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( - const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ - int *c, /* number of input feature maps */ - int *h, /* height of each input filter */ - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int *groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ - int pad_w, /* zero-padding width */ - int u, /* vertical filter stride */ - int v, /* horizontal filter stride */ - int dilation_h, /* filter dilation in the vertical dimension */ - int dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, /* zero-padding height */ - int *pad_w, /* zero-padding width */ - int *u, /* vertical filter stride */ - int *v, /* horizontal filter stride */ - int *dilation_h, /* filter dilation in the vertical dimension */ - int *dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOutputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOutputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, - cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionFwdAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, - cudnnConvolutionFwdAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, - cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdFilterAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, - size_t, cudnnConvolutionBwdFilterAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, - cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdDataAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, - size_t, cudnnConvolutionBwdDataAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, /* same desc for x, means, dy, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ - void *dx, /* output x differential */ - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - */ - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, /* bnBias doesn't affect backpropagation */ - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, - void **states, unsigned long long *seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float *, void **, unsigned long long *); - static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, /* Between layers, not between recurrent steps. */ - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor( - cudnnHandle_t cudnnHandle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, - int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, - cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, - cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, - cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, - cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cudnnHandle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t desc, - cudnnMathType_t math) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, math); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer, - const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, - const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer, - const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, - const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the mini batch size, A - is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - const void *gradients, /* the returned CTC gradients, in GPU memory, to - compute costs only, set it to NULL */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - void *workspace, /* pointer to the workspace, in GPU memory */ - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, - const int *, const int *, void *, const cudnnTensorDescriptor_t, - const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnCTCLoss"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, - costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the mini batch size, A - is the alphabet size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A. To compute costs only, set it to NULL */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const int *, const int *, const int *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, - inputLengths, algo, ctcLossDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, /* Between layers, not between recurrent steps. */ - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5( - cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, /* Between layers, not between recurrent steps. */ - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, - cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, - cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, - direction, mode, dataType); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_1.inc deleted file mode 100644 index 5330e6d0584c2f..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_1.inc +++ /dev/null @@ -1,2361 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, - cudnnStatus_t *rstatus, - cudnnErrQueryMode_t mode, - cudnnRuntimeTag_t *tag) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); - static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rstatus, mode, tag); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w, /* width of input section */ - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, /* image data type */ - int *n, /* number of inputs (batch size) */ - int *c, /* number of input feature maps */ - int *h, /* height of input section */ - int *w, /* width of input section */ - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int k, /* number of output feature maps */ - int c, /* number of input feature maps */ - int h, /* height of each input filter */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( - const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ - int *c, /* number of input feature maps */ - int *h, /* height of each input filter */ - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int *groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ - int pad_w, /* zero-padding width */ - int u, /* vertical filter stride */ - int v, /* horizontal filter stride */ - int dilation_h, /* filter dilation in the vertical dimension */ - int dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, /* zero-padding height */ - int *pad_w, /* zero-padding width */ - int *u, /* vertical filter stride */ - int *v, /* horizontal filter stride */ - int *dilation_h, /* filter dilation in the vertical dimension */ - int *dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOutputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOutputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, - cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionFwdAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, - cudnnConvolutionFwdAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, - cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdFilterAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, - size_t, cudnnConvolutionBwdFilterAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, - cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdDataAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, - size_t, cudnnConvolutionBwdDataAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, /* same desc for x, means, dy, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ - void *dx, /* output x differential */ - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - */ - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, /* bnBias doesn't affect backpropagation */ - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, - void **states, unsigned long long *seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float *, void **, unsigned long long *); - static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardInferenceAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardInferenceAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardTrainingAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardTrainingAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y, - const cudnnTensorDescriptor_t *dyDesc, const void *dy, - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnTensorDescriptor_t *dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardWeightsAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, - const float findIntensity, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults, - const void *workspace, size_t workSpaceSizeInBytes, - const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardWeightsAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - findIntensity, requestedAlgoCount, returnedAlgoCount, - perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, /* Between layers, not between recurrent steps. */ - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - const int recProjSize, const int outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); - static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, - int *outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, - int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, - cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, - cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, - cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, - cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( - cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the mini batch size, A - is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - const void *gradients, /* the returned CTC gradients, in GPU memory, to - compute costs only, set it to NULL */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - void *workspace, /* pointer to the workspace, in GPU memory */ - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, - const int *, const int *, void *, const cudnnTensorDescriptor_t, - const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnCTCLoss"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, - costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the mini batch size, A - is the alphabet size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A. To compute costs only, set it to NULL */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const int *, const int *, const int *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, - inputLengths, algo, ctcLossDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor( - cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnCopyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(src, dest); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToCreate); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance( - cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, - cudnnStatus_t status, float time, size_t memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, - cudnnAlgorithmDescriptor_t, - cudnnStatus_t, float, size_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance( - const cudnnAlgorithmPerformance_t algoPerf, - cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time, - size_t *memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, - cudnnStatus_t *, float *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = - LoadSymbol("cudnnDestroyAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToDestroy); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize( - cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - size_t *algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - void *algoSpace, size_t algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnSaveAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm( - cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnRestoreAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, - cudnnCallback_t fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); - static auto func_ptr = LoadSymbol("cudnnSetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, - cudnnCallback_t *fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); - static auto func_ptr = LoadSymbol("cudnnGetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5( - cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers, - cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode, - cudnnDirectionMode_t direction, cudnnRNNMode_t mode, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, - cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, - cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, - direction, mode, dataType); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_3.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_3.inc deleted file mode 100644 index f1c25c74d0c481..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_3.inc +++ /dev/null @@ -1,2585 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, - cudnnStatus_t *rstatus, - cudnnErrQueryMode_t mode, - cudnnRuntimeTag_t *tag) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); - static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rstatus, mode, tag); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w, /* width of input section */ - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, /* image data type */ - int *n, /* number of inputs (batch size) */ - int *c, /* number of input feature maps */ - int *h, /* height of input section */ - int *w, /* width of input section */ - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int k, /* number of output feature maps */ - int c, /* number of input feature maps */ - int h, /* height of each input filter */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( - const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ - int *c, /* number of input feature maps */ - int *h, /* height of each input filter */ - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int *groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ - int pad_w, /* zero-padding width */ - int u, /* vertical filter stride */ - int v, /* horizontal filter stride */ - int dilation_h, /* filter dilation in the vertical dimension */ - int dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, /* zero-padding height */ - int *pad_w, /* zero-padding width */ - int *u, /* vertical filter stride */ - int *v, /* horizontal filter stride */ - int *dilation_h, /* filter dilation in the vertical dimension */ - int *dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOutputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOutputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, - cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionFwdAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, - cudnnConvolutionFwdAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, - cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdFilterAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, - size_t, cudnnConvolutionBwdFilterAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, - cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdDataAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, - size_t, cudnnConvolutionBwdDataAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, /* same desc for x, means, dy, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ - void *dx, /* output x differential */ - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - */ - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, /* bnBias doesn't affect backpropagation */ - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, - void **states, unsigned long long *seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float *, void **, unsigned long long *); - static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardInferenceAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardInferenceAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardTrainingAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardTrainingAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y, - const cudnnTensorDescriptor_t *dyDesc, const void *dy, - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnTensorDescriptor_t *dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardWeightsAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, - const float findIntensity, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults, - const void *workspace, size_t workSpaceSizeInBytes, - const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardWeightsAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - findIntensity, requestedAlgoCount, returnedAlgoCount, - perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, /* Between layers, not between recurrent steps. */ - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - const int recProjSize, const int outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); - static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, - int *outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, - int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, - cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, - cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, - cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, - cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( - cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the - mini batch size, A is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - const void *gradients, /* the returned CTC gradients, in GPU memory, to - compute costs only, set it to NULL */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - void *workspace, /* pointer to the workspace, in GPU memory */ - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, - const int *, const int *, void *, const cudnnTensorDescriptor_t, - const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnCTCLoss"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, - costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the - timing steps, N is the mini batch size, A is the alphabet - size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the - dimensions are T,N,A. To compute costs - only, set it to NULL */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const int *, const int *, const int *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, - inputLengths, algo, ctcLossDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor( - cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnCopyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(src, dest); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToCreate); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance( - cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, - cudnnStatus_t status, float time, size_t memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, - cudnnAlgorithmDescriptor_t, - cudnnStatus_t, float, size_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance( - const cudnnAlgorithmPerformance_t algoPerf, - cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time, - size_t *memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, - cudnnStatus_t *, float *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = - LoadSymbol("cudnnDestroyAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToDestroy); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize( - cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - size_t *algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - void *algoSpace, size_t algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnSaveAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm( - cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnRestoreAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t clipMode, - cudnnNanPropagation_t clipNanOpt, - double lclip, double rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, - cudnnNanPropagation_t, double, double); - static auto func_ptr = LoadSymbol("cudnnRNNSetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t *clipMode, - cudnnNanPropagation_t *clipNanOpt, - double *lclip, double *rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, - cudnnNanPropagation_t *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnRNNGetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, - cudnnCallback_t fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); - static auto func_ptr = LoadSymbol("cudnnSetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, - cudnnCallback_t *fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); - static auto func_ptr = LoadSymbol("cudnnGetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnRNNPaddingMode_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *RNNDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor( - cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t dataType, - cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, - int vectorSize, - const int seqLengthArray[], /* length of each sequence in the batch */ - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, - int, const int[], void *); - static auto func_ptr = LoadSymbol("cudnnSetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, seqLengthArray, paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor( - cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t *dataType, - cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize, - int *vectorSize, int arrayLengthRequested, int seqLengthArray[], - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, - int *, int *, int *, int, int[], void *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, arrayLengthRequested, seqLengthArray, - paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInferenceEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t yDesc, const void *y, - const cudnnRNNDataDescriptor_t dyDesc, const void *dy, - const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */ - const void *dcAttn, /* reserved, should pass NULL */ - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnRNNDataDescriptor_t dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */ - void *dkeys, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, - dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, - dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, - workSpace, workSpaceSizeInBytes, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, void *, size_t, - const cudnnFilterDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeightsEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, - workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5( - cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers, - cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode, - cudnnDirectionMode_t direction, cudnnRNNMode_t mode, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, - cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, - cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, - direction, mode, dataType); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_4.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_4.inc deleted file mode 100644 index 883c8ba8812ceb..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_4.inc +++ /dev/null @@ -1,2726 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, - cudnnStatus_t *rstatus, - cudnnErrQueryMode_t mode, - cudnnRuntimeTag_t *tag) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); - static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rstatus, mode, tag); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w, /* width of input section */ - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, /* image data type */ - int *n, /* number of inputs (batch size) */ - int *c, /* number of input feature maps */ - int *h, /* height of input section */ - int *w, /* width of input section */ - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int k, /* number of output feature maps */ - int c, /* number of input feature maps */ - int h, /* height of each input filter */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( - const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ - int *c, /* number of input feature maps */ - int *h, /* height of each input filter */ - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int *groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ - int pad_w, /* zero-padding width */ - int u, /* vertical filter stride */ - int v, /* horizontal filter stride */ - int dilation_h, /* filter dilation in the vertical dimension */ - int dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, /* zero-padding height */ - int *pad_w, /* zero-padding width */ - int *u, /* vertical filter stride */ - int *v, /* horizontal filter stride */ - int *dilation_h, /* filter dilation in the vertical dimension */ - int *dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOutputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOutputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, - cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionFwdAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, - cudnnConvolutionFwdAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, - cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdFilterAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, - size_t, cudnnConvolutionBwdFilterAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, - cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdDataAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, - size_t, cudnnConvolutionBwdDataAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, /* same desc for x, means, dy, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ - void *dx, /* output x differential */ - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, - const cudnnTensorDescriptor_t yDesc, - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnActivationDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol( - "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, - bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, - const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, - const cudnnTensorDescriptor_t dxDesc, - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnActivationDescriptor_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetBatchNormalizationBackwardExWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, - dBnScaleBiasDesc, activationDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, - size_t *); - static auto func_ptr = LoadSymbol( - "cudnnGetBatchNormalizationTrainingExReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - */ - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *xData, - const cudnnTensorDescriptor_t zDesc, const void *zData, - const cudnnTensorDescriptor_t yDesc, void *yData, - - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, - - double exponentialAverageFactor, void *resultRunningMean, - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance, - - cudnnActivationDescriptor_t activationDesc, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, - yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, - exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, - resultSaveInvVariance, activationDesc, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, /* bnBias doesn't affect backpropagation */ - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - - const void *alphaDataDiff, const void *betaDataDiff, - const void *alphaParamDiff, const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, const void *xData, - const cudnnTensorDescriptor_t yDesc, const void *yData, - const cudnnTensorDescriptor_t dyDesc, const void *dyData, - const cudnnTensorDescriptor_t dzDesc, void *dzData, - const cudnnTensorDescriptor_t dxDesc, void *dxData, - - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData, - const void *bnBiasData, /* needed if there is activation */ - void *dBnScaleData, void *dBnBiasData, - double epsilon, /* Same epsilon as forward pass */ - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance, - cudnnActivationDescriptor_t activationDesc, void *workSpace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, - const void *, const void *, const void *, const cudnnTensorDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, - void *, double, const void *, const void *, cudnnActivationDescriptor_t, - void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationBackwardEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, - dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, - dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, - workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, - void **states, unsigned long long *seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float *, void **, unsigned long long *); - static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardInferenceAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardInferenceAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardTrainingAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardTrainingAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y, - const cudnnTensorDescriptor_t *dyDesc, const void *dy, - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnTensorDescriptor_t *dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardWeightsAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, - const float findIntensity, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults, - const void *workspace, size_t workSpaceSizeInBytes, - const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardWeightsAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - findIntensity, requestedAlgoCount, returnedAlgoCount, - perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, - cudnnDropoutDescriptor_t - dropoutDesc, /* Between layers, not between recurrent steps. */ - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - const int recProjSize, const int outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); - static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, - int *outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, - int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, - cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, - cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, - cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, - cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( - cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the - mini batch size, A is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - const void *gradients, /* the returned CTC gradients, in GPU memory, to - compute costs only, set it to NULL */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - void *workspace, /* pointer to the workspace, in GPU memory */ - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, - const int *, const int *, void *, const cudnnTensorDescriptor_t, - const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnCTCLoss"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, - costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the - timing steps, N is the mini batch size, A is the alphabet - size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the - dimensions are T,N,A. To compute costs - only, set it to NULL */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const int *, const int *, const int *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, - inputLengths, algo, ctcLossDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor( - cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnCopyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(src, dest); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToCreate); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance( - cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, - cudnnStatus_t status, float time, size_t memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, - cudnnAlgorithmDescriptor_t, - cudnnStatus_t, float, size_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance( - const cudnnAlgorithmPerformance_t algoPerf, - cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time, - size_t *memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, - cudnnStatus_t *, float *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = - LoadSymbol("cudnnDestroyAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToDestroy); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize( - cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - size_t *algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - void *algoSpace, size_t algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnSaveAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm( - cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnRestoreAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t clipMode, - cudnnNanPropagation_t clipNanOpt, - double lclip, double rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, - cudnnNanPropagation_t, double, double); - static auto func_ptr = LoadSymbol("cudnnRNNSetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t *clipMode, - cudnnNanPropagation_t *clipNanOpt, - double *lclip, double *rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, - cudnnNanPropagation_t *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnRNNGetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, - cudnnCallback_t fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); - static auto func_ptr = LoadSymbol("cudnnSetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, - cudnnCallback_t *fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); - static auto func_ptr = LoadSymbol("cudnnGetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnRNNPaddingMode_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *RNNDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor( - cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t dataType, - cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, - int vectorSize, - const int seqLengthArray[], /* length of each sequence in the batch */ - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, - int, const int[], void *); - static auto func_ptr = LoadSymbol("cudnnSetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, seqLengthArray, paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor( - cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t *dataType, - cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize, - int *vectorSize, int arrayLengthRequested, int seqLengthArray[], - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, - int *, int *, int *, int, int[], void *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, arrayLengthRequested, seqLengthArray, - paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInferenceEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t yDesc, const void *y, - const cudnnRNNDataDescriptor_t dyDesc, const void *dy, - const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */ - const void *dcAttn, /* reserved, should pass NULL */ - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnRNNDataDescriptor_t dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */ - void *dkeys, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, - dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, - dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, - workSpace, workSpaceSizeInBytes, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, void *, size_t, - const cudnnFilterDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeightsEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, - workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5( - cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers, - cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode, - cudnnDirectionMode_t direction, cudnnRNNMode_t mode, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, - cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, - cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, - direction, mode, dataType); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_6.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_6.inc deleted file mode 100644 index 9dd420a9022d57..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_7_6.inc +++ /dev/null @@ -1,3257 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, - cudnnStatus_t *rstatus, - cudnnErrQueryMode_t mode, - cudnnRuntimeTag_t *tag) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); - static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rstatus, mode, tag); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w, /* width of input section */ - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, /* image data type */ - int *n, /* number of inputs (batch size) */ - int *c, /* number of input feature maps */ - int *h, /* height of input section */ - int *w, /* width of input section */ - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest( - const cudnnTensorTransformDescriptor_t transformDesc, - const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, - size_t *destSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorTransformDescriptor_t, const cudnnTensorDescriptor_t, - cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnInitTransformDest"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, srcDesc, destDesc, destSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t *transformDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, - const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], - const int32_t padAfterA[], const uint32_t foldA[], - const cudnnFoldingDirection_t direction) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorTransformDescriptor_t, const uint32_t, - const cudnnTensorFormat_t, const int32_t[], const int32_t[], - const uint32_t[], const cudnnFoldingDirection_t); - static auto func_ptr = - LoadSymbol("cudnnSetTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, nbDims, destFormat, padBeforeA, padAfterA, - foldA, direction); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, - cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[], - uint32_t foldA[], cudnnFoldingDirection_t *direction) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorTransformDescriptor_t, uint32_t, cudnnTensorFormat_t *, - int32_t[], int32_t[], uint32_t[], cudnnFoldingDirection_t *); - static auto func_ptr = - LoadSymbol("cudnnGetTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, nbDimsRequested, destFormat, padBeforeA, - padAfterA, foldA, direction); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t transformDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx( - cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, - const void *alpha, const cudnnTensorDescriptor_t srcDesc, - const void *srcData, const void *beta, - const cudnnTensorDescriptor_t destDesc, void *destData) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, - destData); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFoldedConvBackwardDataDescriptors( - const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, - const cudnnTensorFormat_t transformFormat, - cudnnFilterDescriptor_t foldedFilterDesc, - cudnnTensorDescriptor_t paddedDiffDesc, - cudnnConvolutionDescriptor_t foldedConvDesc, - cudnnTensorDescriptor_t foldedGradDesc, - cudnnTensorTransformDescriptor_t filterFoldTransDesc, - cudnnTensorTransformDescriptor_t diffPadTransDesc, - cudnnTensorTransformDescriptor_t gradFoldTransDesc, - cudnnTensorTransformDescriptor_t gradUnfoldTransDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorFormat_t, - cudnnFilterDescriptor_t, cudnnTensorDescriptor_t, - cudnnConvolutionDescriptor_t, cudnnTensorDescriptor_t, - cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t, - cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnGetFoldedConvBackwardDataDescriptors"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - transformFormat, foldedFilterDesc, paddedDiffDesc, - foldedConvDesc, foldedGradDesc, filterFoldTransDesc, - diffPadTransDesc, gradFoldTransDesc, gradUnfoldTransDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int k, /* number of output feature maps */ - int c, /* number of input feature maps */ - int h, /* height of each input filter */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( - const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ - int *c, /* number of input feature maps */ - int *h, /* height of each input filter */ - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes( - const cudnnFilterDescriptor_t filterDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnFilterDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetFilterSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, size); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformFilter( - cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, - const void *alpha, const cudnnFilterDescriptor_t srcDesc, - const void *srcData, const void *beta, - const cudnnFilterDescriptor_t destDesc, void *destData) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, const void *, - const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, - destData); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnReorderFilterAndBias( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - cudnnReorderType_t reorderType, const void *filterData, - void *reorderedFilterData, int reorderBias, const void *biasData, - void *reorderedBiasData) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, cudnnReorderType_t, - const void *, void *, int, const void *, void *); - static auto func_ptr = LoadSymbol("cudnnReorderFilterAndBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, reorderType, filterData, - reorderedFilterData, reorderBias, biasData, - reorderedBiasData); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int *groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionReorderType( - cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnReorderType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionReorderType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, reorderType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionReorderType( - cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnReorderType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionReorderType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, reorderType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ - int pad_w, /* zero-padding width */ - int u, /* vertical filter stride */ - int v, /* horizontal filter stride */ - int dilation_h, /* filter dilation in the vertical dimension */ - int dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, /* zero-padding height */ - int *pad_w, /* zero-padding width */ - int *u, /* vertical filter stride */ - int *v, /* horizontal filter stride */ - int *dilation_h, /* filter dilation in the vertical dimension */ - int *dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOutputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOutputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, - cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionFwdAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, - cudnnConvolutionFwdAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, - cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdFilterAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, - size_t, cudnnConvolutionBwdFilterAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, - cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes, - cudnnConvolutionBwdDataAlgo_t *algo) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, - size_t, cudnnConvolutionBwdDataAlgo_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, - memoryLimitInBytes, algo); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, /* same desc for x, means, dy, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ - void *dx, /* output x differential */ - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, - const cudnnTensorDescriptor_t yDesc, - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnActivationDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol( - "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, - bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, - const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, - const cudnnTensorDescriptor_t dxDesc, - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnActivationDescriptor_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetBatchNormalizationBackwardExWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, - dBnScaleBiasDesc, activationDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, - size_t *); - static auto func_ptr = LoadSymbol( - "cudnnGetBatchNormalizationTrainingExReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - */ - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *xData, - const cudnnTensorDescriptor_t zDesc, const void *zData, - const cudnnTensorDescriptor_t yDesc, void *yData, - - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, - - double exponentialAverageFactor, void *resultRunningMean, - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance, - - cudnnActivationDescriptor_t activationDesc, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, - yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, - exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, - resultSaveInvVariance, activationDesc, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, /* bnBias doesn't affect backpropagation */ - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - - const void *alphaDataDiff, const void *betaDataDiff, - const void *alphaParamDiff, const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, const void *xData, - const cudnnTensorDescriptor_t yDesc, const void *yData, - const cudnnTensorDescriptor_t dyDesc, const void *dyData, - const cudnnTensorDescriptor_t dzDesc, void *dzData, - const cudnnTensorDescriptor_t dxDesc, void *dxData, - - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData, - const void *bnBiasData, /* needed if there is activation */ - void *dBnScaleData, void *dBnBiasData, - double epsilon, /* Same epsilon as forward pass */ - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance, - cudnnActivationDescriptor_t activationDesc, void *workSpace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, - const void *, const void *, const void *, const cudnnTensorDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, - void *, double, const void *, const void *, cudnnActivationDescriptor_t, - void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationBackwardEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, - dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, - dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, - workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, - void **states, unsigned long long *seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float *, void **, unsigned long long *); - static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, mathPrec); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, - int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, - cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, - cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *mathPrec) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, - cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, - cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, mathPrec); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( - cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, - cudnnRNNBiasMode_t biasMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNBiasMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, biasMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, - cudnnRNNBiasMode_t *biasMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNBiasMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, biasMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t clipMode, - cudnnNanPropagation_t clipNanOpt, - double lclip, double rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, - cudnnNanPropagation_t, double, double); - static auto func_ptr = LoadSymbol("cudnnRNNSetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t *clipMode, - cudnnNanPropagation_t *clipNanOpt, - double *lclip, double *rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, - cudnnNanPropagation_t *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnRNNGetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - const int recProjSize, const int outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); - static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, - int *outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnRNNPaddingMode_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor( - cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType, - cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, - int vectorSize, - const int seqLengthArray[], /* length of each sequence in the batch */ - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, - int, const int[], void *); - static auto func_ptr = LoadSymbol("cudnnSetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, seqLengthArray, paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor( - cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t *dataType, - cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize, - int *vectorSize, int arrayLengthRequested, int seqLengthArray[], - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, - int *, int *, int *, int, int[], void *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, arrayLengthRequested, seqLengthArray, - paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInferenceEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t yDesc, const void *y, - const cudnnRNNDataDescriptor_t dyDesc, const void *dy, - const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */ - const void *dcAttn, /* reserved, should pass NULL */ - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnRNNDataDescriptor_t dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */ - void *dkeys, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, - dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, - dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, - workSpace, workSpaceSizeInBytes, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, void *, size_t, - const cudnnFilterDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeightsEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, - workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardInferenceAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardInferenceAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNForwardTrainingAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNForwardTrainingAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y, - const cudnnTensorDescriptor_t *dyDesc, const void *dy, - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnTensorDescriptor_t *dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnAlgorithmPerformance_t *perfResults, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, - requestedAlgoCount, returnedAlgoCount, perfResults, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetRNNBackwardWeightsAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, - const float findIntensity, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults, - const void *workspace, size_t workSpaceSizeInBytes, - const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const float, const int, - int *, cudnnAlgorithmPerformance_t *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindRNNBackwardWeightsAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - findIntensity, requestedAlgoCount, returnedAlgoCount, - perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateSeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroySeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSeqDataDescriptor( - cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const cudnnSeqDataAxis_t axes[], - size_t seqLengthArraySize, const int seqLengthArray[], void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSeqDataDescriptor_t, cudnnDataType_t, int, const int[], - const cudnnSeqDataAxis_t[], size_t, const int[], void *); - static auto func_ptr = LoadSymbol("cudnnSetSeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc, dataType, nbDims, dimA, axes, seqLengthArraySize, - seqLengthArray, paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetSeqDataDescriptor( - const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t *dataType, - int *nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[], - size_t *seqLengthArraySize, size_t seqLengthSizeRequested, - int seqLengthArray[], void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnSeqDataDescriptor_t, cudnnDataType_t *, int *, int, int[], - cudnnSeqDataAxis_t[], size_t *, size_t, int[], void *); - static auto func_ptr = LoadSymbol("cudnnGetSeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc, dataType, nbDims, nbDimsRequested, dimA, axes, - seqLengthArraySize, seqLengthSizeRequested, seqLengthArray, - paddingFill); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAttnDescriptor( - cudnnAttnDescriptor_t attnDesc, cudnnAttnQueryMap_t queryMap, int nHeads, - double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec, - cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc, - cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize, - int qProjSize, int kProjSize, int vProjSize, int oProjSize, - int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnAttnDescriptor_t, cudnnAttnQueryMap_t, int, double, cudnnDataType_t, - cudnnDataType_t, cudnnMathType_t, cudnnDropoutDescriptor_t, - cudnnDropoutDescriptor_t, int, int, int, int, int, int, int, int, int, - int, int); - static auto func_ptr = LoadSymbol("cudnnSetAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc, queryMap, nHeads, smScaler, dataType, computePrec, - mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, - vSize, qProjSize, kProjSize, vProjSize, oProjSize, - qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAttnDescriptor( - cudnnAttnDescriptor_t attnDesc, cudnnAttnQueryMap_t *queryMap, int *nHeads, - double *smScaler, cudnnDataType_t *dataType, cudnnDataType_t *computePrec, - cudnnMathType_t *mathType, cudnnDropoutDescriptor_t *attnDropoutDesc, - cudnnDropoutDescriptor_t *postDropoutDesc, int *qSize, int *kSize, - int *vSize, int *qProjSize, int *kProjSize, int *vProjSize, int *oProjSize, - int *qoMaxSeqLength, int *kvMaxSeqLength, int *maxBatchSize, - int *maxBeamSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnAttnDescriptor_t, cudnnAttnQueryMap_t *, int *, double *, - cudnnDataType_t *, cudnnDataType_t *, cudnnMathType_t *, - cudnnDropoutDescriptor_t *, cudnnDropoutDescriptor_t *, int *, int *, - int *, int *, int *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc, queryMap, nHeads, smScaler, dataType, computePrec, - mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, - vSize, qProjSize, kProjSize, vProjSize, oProjSize, - qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnBuffers( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - size_t *weightSizeInBytes, size_t *workSpaceSizeInBytes, - size_t *reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, size_t *, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnBuffers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, weightSizeInBytes, workSpaceSizeInBytes, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnWeights( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes, - const void *w, cudnnTensorDescriptor_t wDesc, void **wAddr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, - cudnnMultiHeadAttnWeightKind_t, size_t, const void *, - cudnnTensorDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, wKind, weightSizeInBytes, w, wDesc, wAddr); -} - -cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnForward( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx, - const int *loWinIdx, const int *hiWinIdx, const int *seqLengthArrayQRO, - const int *seqLengthArrayKV, const cudnnSeqDataDescriptor_t qDesc, - const void *queries, const void *residuals, - const cudnnSeqDataDescriptor_t kDesc, const void *keys, - const cudnnSeqDataDescriptor_t vDesc, const void *values, - const cudnnSeqDataDescriptor_t oDesc, void *out, size_t weightSizeInBytes, - const void *w, size_t workSpaceSizeInBytes, void *workSpace, - size_t reserveSpaceSizeInBytes, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, int, const int *, const int *, - const int *, const int *, const cudnnSeqDataDescriptor_t, const void *, - const void *, const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, void *, size_t, const void *, size_t, - void *, size_t, void *); - static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, currIdx, loWinIdx, hiWinIdx, - seqLengthArrayQRO, seqLengthArrayKV, qDesc, queries, - residuals, kDesc, keys, vDesc, values, oDesc, out, - weightSizeInBytes, w, workSpaceSizeInBytes, workSpace, - reserveSpaceSizeInBytes, reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardData( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - const int *loWinIdx, const int *hiWinIdx, const int *seqLengthArrayDQDO, - const int *seqLengthArrayDKDV, const cudnnSeqDataDescriptor_t doDesc, - const void *dout, const cudnnSeqDataDescriptor_t dqDesc, void *dqueries, - const void *queries, const cudnnSeqDataDescriptor_t dkDesc, void *dkeys, - const void *keys, const cudnnSeqDataDescriptor_t dvDesc, void *dvalues, - const void *values, size_t weightSizeInBytes, const void *w, - size_t workSpaceSizeInBytes, void *workSpace, - size_t reserveSpaceSizeInBytes, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, const int *, const int *, - const int *, const int *, const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, void *, const void *, - const cudnnSeqDataDescriptor_t, void *, const void *, - const cudnnSeqDataDescriptor_t, void *, const void *, size_t, - const void *, size_t, void *, size_t, void *); - static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, loWinIdx, hiWinIdx, seqLengthArrayDQDO, - seqLengthArrayDKDV, doDesc, dout, dqDesc, dqueries, queries, - dkDesc, dkeys, keys, dvDesc, dvalues, values, - weightSizeInBytes, w, workSpaceSizeInBytes, workSpace, - reserveSpaceSizeInBytes, reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardWeights( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc, - const void *queries, const cudnnSeqDataDescriptor_t kDesc, const void *keys, - const cudnnSeqDataDescriptor_t vDesc, const void *values, - const cudnnSeqDataDescriptor_t doDesc, const void *dout, - size_t weightSizeInBytes, const void *w, void *dw, - size_t workSpaceSizeInBytes, void *workSpace, - size_t reserveSpaceSizeInBytes, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnWgradMode_t, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, size_t, const void *, - void *, size_t, void *, size_t, void *); - static auto func_ptr = - LoadSymbol("cudnnMultiHeadAttnBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, addGrad, qDesc, queries, kDesc, keys, vDesc, - values, doDesc, dout, weightSizeInBytes, w, dw, - workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, - reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptorEx( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, - cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, - cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType, normMode, gradMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptorEx( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, - cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnCTCLossDescriptor_t, cudnnDataType_t *, - cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType, normMode, gradMode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the - mini batch size, A is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - const void *gradients, /* the returned CTC gradients, in GPU memory, to - compute costs only, set it to NULL */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - void *workspace, /* pointer to the workspace, in GPU memory */ - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, - const int *, const int *, void *, const cudnnTensorDescriptor_t, - const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnCTCLoss"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, - costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the - timing steps, N is the mini batch size, A is the alphabet - size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the - dimensions are T,N,A. To compute costs - only, set it to NULL */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const int *, const int *, const int *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, - inputLengths, algo, ctcLossDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor( - cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithm_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc, algorithm); -} - -cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor( - const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnCopyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(src, dest); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyAlgorithmDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToCreate); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance( - cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, - cudnnStatus_t status, float time, size_t memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, - cudnnAlgorithmDescriptor_t, - cudnnStatus_t, float, size_t); - static auto func_ptr = LoadSymbol("cudnnSetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance( - const cudnnAlgorithmPerformance_t algoPerf, - cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time, - size_t *memory) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, - cudnnStatus_t *, float *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, algoDesc, status, time, memory); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = - LoadSymbol("cudnnDestroyAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToDestroy); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize( - cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - size_t *algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetAlgorithmSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, - void *algoSpace, size_t algoSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnSaveAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm( - cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes, - cudnnAlgorithmDescriptor_t algoDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, - cudnnAlgorithmDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnRestoreAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, - cudnnCallback_t fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); - static auto func_ptr = LoadSymbol("cudnnSetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, - cudnnCallback_t *fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); - static auto func_ptr = LoadSymbol("cudnnGetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsConstParamPack( - cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t *, - cudnnFusedOps_t); - static auto func_ptr = - LoadSymbol("cudnnCreateFusedOpsConstParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack, ops); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyFusedOpsConstParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsConstParamPackAttribute( - cudnnFusedOpsConstParamPack_t constPack, - cudnnFusedOpsConstParamLabel_t paramLabel, const void *param) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t, - cudnnFusedOpsConstParamLabel_t, - const void *); - static auto func_ptr = - LoadSymbol("cudnnSetFusedOpsConstParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack, paramLabel, param); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsConstParamPackAttribute( - const cudnnFusedOpsConstParamPack_t constPack, - cudnnFusedOpsConstParamLabel_t paramLabel, void *param, int *isNULL) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t, - void *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetFusedOpsConstParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack, paramLabel, param, isNULL); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsVariantParamPack( - cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnFusedOpsVariantParamPack_t *, cudnnFusedOps_t); - static auto func_ptr = - LoadSymbol("cudnnCreateFusedOpsVariantParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack, ops); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyFusedOpsVariantParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsVariantParamPackAttribute( - cudnnFusedOpsVariantParamPack_t varPack, - cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t, - cudnnFusedOpsVariantParamLabel_t, void *); - static auto func_ptr = - LoadSymbol("cudnnSetFusedOpsVariantParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack, paramLabel, ptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsVariantParamPackAttribute( - const cudnnFusedOpsVariantParamPack_t varPack, - cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnFusedOpsVariantParamPack_t, - cudnnFusedOpsVariantParamLabel_t, void *); - static auto func_ptr = - LoadSymbol("cudnnGetFusedOpsVariantParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack, paramLabel, ptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, - cudnnFusedOps_t ops) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t *, cudnnFusedOps_t); - static auto func_ptr = LoadSymbol("cudnnCreateFusedOpsPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, ops); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFusedOpsPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan, - const cudnnFusedOpsConstParamPack_t constPack, - size_t *workspaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnFusedOpsPlan_t, const cudnnFusedOpsConstParamPack_t, - size_t *); - static auto func_ptr = LoadSymbol("cudnnMakeFusedOpsPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, plan, constPack, workspaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, - cudnnFusedOpsVariantParamPack_t varPack) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnFusedOpsPlan_t, - cudnnFusedOpsVariantParamPack_t); - static auto func_ptr = LoadSymbol("cudnnFusedOpsExecute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, plan, varPack); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, mode, algo, mathPrec); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5( - cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers, - cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode, - cudnnDirectionMode_t direction, cudnnRNNMode_t mode, - cudnnDataType_t mathPrec) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, - cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, - cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v5"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, - direction, mode, mathPrec); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_8_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_8_0.inc deleted file mode 100644 index d9bf35184e4e41..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_8_0.inc +++ /dev/null @@ -1,3213 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -size_t CUDNNWINAPI cudnnGetVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -size_t CUDNNWINAPI cudnnGetCudartVersion(void) { - using FuncPtr = size_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); - if (!func_ptr) return 0; - return func_ptr(); -} - -const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { - using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); - static auto func_ptr = LoadSymbol("cudnnGetErrorString"); - if (!func_ptr) return "cudnnGetErrorString symbol not found."; - return func_ptr(status); -} - -cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, - cudnnStatus_t *rstatus, - cudnnErrQueryMode_t mode, - cudnnRuntimeTag_t *tag) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); - static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rstatus, mode, tag); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cudnnGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); - static auto func_ptr = LoadSymbol("cudnnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); - static auto func_ptr = LoadSymbol("cudnnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - - -#if CUDNN_MAJOR>=8 && (CUDNN_MINOR > 0 || CUDNN_PATCHLEVEL >= 4) -cudnnStatus_t CUDNNWINAPI cudnnCnnInferVersionCheck(void) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnCnnInferVersionCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cudnnStatus_t CUDNNWINAPI cudnnCnnTrainVersionCheck(void) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnCnnTrainVersionCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} -#endif - -cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudnnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cudnnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t dataType, /* image data type */ - int n, /* number of inputs (batch size) */ - int c, /* number of input feature maps */ - int h, /* height of input section */ - int w, /* width of input section */ - int nStride, int cStride, int hStride, int wStride) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, - int, int, int, int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( - const cudnnTensorDescriptor_t tensorDesc, - cudnnDataType_t *dataType, /* image data type */ - int *n, /* number of inputs (batch size) */ - int *c, /* number of input feature maps */ - int *h, /* height of input section */ - int *w, /* width of input section */ - int *nStride, int *cStride, int *hStride, int *wStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, - wStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( - cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( - cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, - cudnnDataType_t dataType, int nbDims, const int dimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, - cudnnDataType_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, format, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( - const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, - cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, - cudnnDataType_t *, int *, int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( - const cudnnTensorDescriptor_t tensorDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc, size); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(tensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest( - const cudnnTensorTransformDescriptor_t transformDesc, - const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, - size_t *destSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnTensorTransformDescriptor_t, const cudnnTensorDescriptor_t, - cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnInitTransformDest"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, srcDesc, destDesc, destSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t *transformDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, - const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], - const int32_t padAfterA[], const uint32_t foldA[], - const cudnnFoldingDirection_t direction) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorTransformDescriptor_t, const uint32_t, - const cudnnTensorFormat_t, const int32_t[], const int32_t[], - const uint32_t[], const cudnnFoldingDirection_t); - static auto func_ptr = - LoadSymbol("cudnnSetTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, nbDims, destFormat, padBeforeA, padAfterA, - foldA, direction); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, - cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[], - uint32_t foldA[], cudnnFoldingDirection_t *direction) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnTensorTransformDescriptor_t, uint32_t, cudnnTensorFormat_t *, - int32_t[], int32_t[], uint32_t[], cudnnFoldingDirection_t *); - static auto func_ptr = - LoadSymbol("cudnnGetTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc, nbDimsRequested, destFormat, padBeforeA, - padAfterA, foldA, direction); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor( - cudnnTensorTransformDescriptor_t transformDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyTensorTransformDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(transformDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx( - cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, - const void *alpha, const cudnnTensorDescriptor_t srcDesc, - const void *srcData, const void *beta, - const cudnnTensorDescriptor_t destDesc, void *destData) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformTensorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, - destData); -} - -cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t aDesc, - const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, - void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnAddTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( - cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, - cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, - cudnnDataType_t, cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( - const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, - cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, - cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(opTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpTensor( - cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, - const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, - const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, - const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnOpTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, - beta, cDesc, C); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t *reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, - cudnnNanPropagation_t reduceTensorNanOpt, - cudnnReduceTensorIndices_t reduceTensorIndices, - cudnnIndicesType_t reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, - cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); - static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( - const cudnnReduceTensorDescriptor_t reduceTensorDesc, - cudnnReduceTensorOp_t *reduceTensorOp, - cudnnDataType_t *reduceTensorCompType, - cudnnNanPropagation_t *reduceTensorNanOpt, - cudnnReduceTensorIndices_t *reduceTensorIndices, - cudnnIndicesType_t *reduceTensorIndicesType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, - cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, - cudnnIndicesType_t *); - static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, - reduceTensorNanOpt, reduceTensorIndices, - reduceTensorIndicesType); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( - cudnnReduceTensorDescriptor_t reduceTensorDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyReduceTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(reduceTensorDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( - cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, - void *indices, size_t indicesSizeInBytes, void *workspace, - size_t workspaceSizeInBytes, const void *alpha, - const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, - const cudnnTensorDescriptor_t cDesc, void *C) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, - void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnReduceTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, - workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, - C); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *valuePtr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnSetTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, valuePtr); -} - -cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, - const cudnnTensorDescriptor_t yDesc, - void *y, const void *alpha) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); - static auto func_ptr = LoadSymbol("cudnnScaleTensor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, yDesc, y, alpha); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int k, /* number of output feature maps */ - int c, /* number of input feature maps */ - int h, /* height of each input filter */ - int w) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( - const cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ - int *c, /* number of input feature maps */ - int *h, /* height of each input filter */ - int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, - int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, k, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( - cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, /* image data type */ - cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, - cudnnTensorFormat_t, int, const int[]); - static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( - const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, - cudnnDataType_t *dataType, /* image data type */ - cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFilterDescriptor_t, int, cudnnDataType_t *, - cudnnTensorFormat_t *, int *, int[]); - static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, - filterDimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes( - const cudnnFilterDescriptor_t filterDesc, size_t *size) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnFilterDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetFilterSizeInBytes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc, size); -} - -cudnnStatus_t CUDNNWINAPI cudnnTransformFilter( - cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, - const void *alpha, const cudnnFilterDescriptor_t srcDesc, - const void *srcData, const void *beta, - const cudnnFilterDescriptor_t destDesc, void *destData) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, const void *, - const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnTransformFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, - destData); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(filterDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( - cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, - cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, - int verticalPadding, int horizontalPadding, int verticalStride, - int horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, - int, int, int, int, int); - static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, - cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, - int *windowWidth, int *verticalPadding, int *horizontalPadding, - int *verticalStride, int *horizontalStride) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, - windowWidth, verticalPadding, horizontalPadding, - verticalStride, horizontalStride); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( - cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, - const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, - const int windowDimA[], const int paddingA[], const int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, - const cudnnNanPropagation_t, int, const int[], const int[], const int[]); - static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, - paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( - const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, - cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, - int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, - cudnnNanPropagation_t *, int *, int[], int[], int[]); - static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, - windowDimA, paddingA, strideA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, int outputTensorDimA[]) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, - const cudnnTensorDescriptor_t, - int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetPooling2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(poolingDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( - cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, - cudnnNanPropagation_t reluNanOpt, double coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, - cudnnActivationMode_t, - cudnnNanPropagation_t, double); - static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, - cudnnActivationMode_t *mode, - cudnnNanPropagation_t *reluNanOpt, double *coef) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnActivationDescriptor_t, cudnnActivationMode_t *, - cudnnNanPropagation_t *, double *); - static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc, mode, reluNanOpt, coef); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyActivationDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(activationDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationForward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned lrnN, double lrnAlpha, - double lrnBeta, double lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int, double, double, double); - static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, - unsigned *lrnN, - double *lrnAlpha, - double *lrnBeta, double *lrnK) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lrnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, - beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( - cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, - cudnnBatchNormMode_t mode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, - cudnnBatchNormMode_t); - static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(derivedBnDesc, xDesc, mode); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, const void *estimatedMean, - const void *estimatedVariance, double epsilon) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, const void *, const void *, double); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, - bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, - estimatedVariance, epsilon); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t *stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, - cudnnDataType_t dataType, const int nbDims, const int dimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, - const int, const int[]); - static auto func_ptr = - LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( - cudnnSpatialTransformerDescriptor_t stDesc) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *theta, void *grid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, theta, grid); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, - void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( - cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(xdesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, - void *states, size_t stateSizeInBytes, unsigned long long seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float, void *, size_t, unsigned long long); - static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( - cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, - void **states, unsigned long long *seed) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, - float *, void **, unsigned long long *); - static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dropoutDesc, handle, dropout, states, seed); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t xdesc, const void *x, - const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToCreate); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( - cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); - static auto func_ptr = - LoadSymbol("cudnnDestroyAlgorithmPerformance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(algoPerf, numberToDestroy); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, - cudnnCallback_t fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); - static auto func_ptr = LoadSymbol("cudnnSetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, - cudnnCallback_t *fptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); - static auto func_ptr = LoadSymbol("cudnnGetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask, udata, fptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpsInferVersionCheck(void) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnOpsInferVersionCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); - static auto func_ptr = - LoadSymbol("cudnnCreateConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyConvolutionDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( - cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, mathType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( - cudnnConvolutionDescriptor_t convDesc, int *groupCount) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, groupCount); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionReorderType( - cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnReorderType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionReorderType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, reorderType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionReorderType( - cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, - cudnnReorderType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionReorderType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, reorderType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( - cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ - int pad_w, /* zero-padding width */ - int u, /* vertical filter stride */ - int v, /* horizontal filter stride */ - int dilation_h, /* filter dilation in the vertical dimension */ - int dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, int, int, int, int, int, - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( - const cudnnConvolutionDescriptor_t convDesc, - int *pad_h, /* zero-padding height */ - int *pad_w, /* zero-padding width */ - int *u, /* vertical filter stride */ - int *v, /* horizontal filter stride */ - int *dilation_h, /* filter dilation in the vertical dimension */ - int *dilation_w, /* filter dilation in the horizontal dimension */ - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, - int *, cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( - cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ - const int padA[], const int filterStrideA[], const int dilationA[], - cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], - cudnnConvolutionMode_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, - computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( - const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, - int *arrayLength, int padA[], int strideA[], int dilationA[], - cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], - cudnnConvolutionMode_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, - dilationA, mode, computeType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int *, int *, int *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, int nbDims, - int tensorOuputDimA[]) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, int, int[]); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, - tensorOuputDimA); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionFwdAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const void *x, const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, - const void *, const cudnnFilterDescriptor_t, - const cudnnConvolutionDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnIm2Col"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); -} - -cudnnStatus_t CUDNNWINAPI cudnnReorderFilterAndBias( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - cudnnReorderType_t reorderType, const void *filterData, - void *reorderedFilterData, int reorderBias, const void *biasData, - void *reorderedBiasData) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, cudnnReorderType_t, - const void *, void *, int, const void *, void *); - static auto func_ptr = LoadSymbol("cudnnReorderFilterAndBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, reorderType, filterData, - reorderedFilterData, reorderBias, biasData, - reorderedBiasData); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, beta, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( - cudnnHandle_t handle, const void *alpha1, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, - void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, - const cudnnTensorDescriptor_t zDesc, const void *z, - const cudnnTensorDescriptor_t biasDesc, const void *bias, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t yDesc, void *y) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = - LoadSymbol("cudnnConvolutionBiasActivationForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, - workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, - activationDesc, yDesc, y); -} - -cudnnStatus_t CUDNNWINAPI -cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, - cudnnBackendDescriptor_t *descriptor) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnBackendDescriptorType_t, - cudnnBackendDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnBackendCreateDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descriptorType, descriptor); -} - -cudnnStatus_t CUDNNWINAPI -cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnBackendDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnBackendDestroyDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descriptor); -} - -cudnnStatus_t CUDNNWINAPI -cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnBackendDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnBackendFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descriptor); -} - -cudnnStatus_t CUDNNWINAPI -cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor, - cudnnBackendAttributeName_t attributeName, - cudnnBackendAttributeType_t attributeType, - int64_t elementCount, const void *arrayOfElements) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnBackendDescriptor_t, cudnnBackendAttributeName_t, - cudnnBackendAttributeType_t, int64_t, const void *); - static auto func_ptr = LoadSymbol("cudnnBackendSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descriptor, attributeName, attributeType, elementCount, - arrayOfElements); -} - -cudnnStatus_t CUDNNWINAPI cudnnBackendGetAttribute( - cudnnBackendDescriptor_t const descriptor, - cudnnBackendAttributeName_t attributeName, - cudnnBackendAttributeType_t attributeType, int64_t requestedElementCount, - int64_t *elementCount, void *arrayOfElements) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnBackendDescriptor_t const, cudnnBackendAttributeName_t, - cudnnBackendAttributeType_t, int64_t, int64_t *, void *); - static auto func_ptr = LoadSymbol("cudnnBackendGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descriptor, attributeName, attributeType, - requestedElementCount, elementCount, arrayOfElements); -} - -cudnnStatus_t CUDNNWINAPI cudnnBackendExecute( - cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, - cudnnBackendDescriptor_t variantPack) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBackendDescriptor_t, cudnnBackendDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnBackendExecute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, executionPlan, variantPack); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, void *dx, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( - cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const int, int *, - cudnnConvolutionBwdDataAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( - cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( - cudnnHandle_t handle, const void *alpha, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, - size_t, const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFoldedConvBackwardDataDescriptors( - const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t gradDesc, - const cudnnTensorFormat_t transformFormat, - cudnnFilterDescriptor_t foldedFilterDesc, - cudnnTensorDescriptor_t paddedDiffDesc, - cudnnConvolutionDescriptor_t foldedConvDesc, - cudnnTensorDescriptor_t foldedGradDesc, - cudnnTensorTransformDescriptor_t filterFoldTransDesc, - cudnnTensorTransformDescriptor_t diffPadTransDesc, - cudnnTensorTransformDescriptor_t gradFoldTransDesc, - cudnnTensorTransformDescriptor_t gradUnfoldTransDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnHandle_t, const cudnnFilterDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorFormat_t, - cudnnFilterDescriptor_t, cudnnTensorDescriptor_t, - cudnnConvolutionDescriptor_t, cudnnTensorDescriptor_t, - cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t, - cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t); - static auto func_ptr = - LoadSymbol("cudnnGetFoldedConvBackwardDataDescriptors"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, - transformFormat, foldedFilterDesc, paddedDiffDesc, - foldedConvDesc, foldedGradDesc, filterFoldTransDesc, - diffPadTransDesc, gradFoldTransDesc, gradUnfoldTransDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsConstParamPack( - cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t *, - cudnnFusedOps_t); - static auto func_ptr = - LoadSymbol("cudnnCreateFusedOpsConstParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack, ops); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyFusedOpsConstParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsConstParamPackAttribute( - cudnnFusedOpsConstParamPack_t constPack, - cudnnFusedOpsConstParamLabel_t paramLabel, const void *param) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t, - cudnnFusedOpsConstParamLabel_t, - const void *); - static auto func_ptr = - LoadSymbol("cudnnSetFusedOpsConstParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack, paramLabel, param); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsConstParamPackAttribute( - const cudnnFusedOpsConstParamPack_t constPack, - cudnnFusedOpsConstParamLabel_t paramLabel, void *param, int *isNULL) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t, - void *, int *); - static auto func_ptr = - LoadSymbol("cudnnGetFusedOpsConstParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constPack, paramLabel, param, isNULL); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsVariantParamPack( - cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnFusedOpsVariantParamPack_t *, cudnnFusedOps_t); - static auto func_ptr = - LoadSymbol("cudnnCreateFusedOpsVariantParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack, ops); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t); - static auto func_ptr = - LoadSymbol("cudnnDestroyFusedOpsVariantParamPack"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsVariantParamPackAttribute( - cudnnFusedOpsVariantParamPack_t varPack, - cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t, - cudnnFusedOpsVariantParamLabel_t, void *); - static auto func_ptr = - LoadSymbol("cudnnSetFusedOpsVariantParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack, paramLabel, ptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsVariantParamPackAttribute( - const cudnnFusedOpsVariantParamPack_t varPack, - cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(const cudnnFusedOpsVariantParamPack_t, - cudnnFusedOpsVariantParamLabel_t, void *); - static auto func_ptr = - LoadSymbol("cudnnGetFusedOpsVariantParamPackAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(varPack, paramLabel, ptr); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, - cudnnFusedOps_t ops) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t *, cudnnFusedOps_t); - static auto func_ptr = LoadSymbol("cudnnCreateFusedOpsPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, ops); -} - -cudnnStatus_t CUDNNWINAPI cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyFusedOpsPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan, - const cudnnFusedOpsConstParamPack_t constPack, - size_t *workspaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnFusedOpsPlan_t, const cudnnFusedOpsConstParamPack_t, - size_t *); - static auto func_ptr = LoadSymbol("cudnnMakeFusedOpsPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, plan, constPack, workspaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, - cudnnFusedOpsVariantParamPack_t varPack) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnFusedOpsPlan_t, - cudnnFusedOpsVariantParamPack_t); - static auto func_ptr = LoadSymbol("cudnnFusedOpsExecute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, plan, varPack); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v8( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t algo, cudnnRNNMode_t cellMode, - cudnnRNNBiasMode_t biasMode, cudnnDirectionMode_t dirMode, - cudnnRNNInputMode_t inputMode, cudnnDataType_t dataType, - cudnnDataType_t mathPrec, cudnnMathType_t mathType, int32_t inputSize, - int32_t hiddenSize, int32_t projSize, int32_t numLayers, - cudnnDropoutDescriptor_t dropoutDesc, uint32_t auxFlags) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, cudnnRNNAlgo_t, cudnnRNNMode_t, cudnnRNNBiasMode_t, - cudnnDirectionMode_t, cudnnRNNInputMode_t, cudnnDataType_t, - cudnnDataType_t, cudnnMathType_t, int32_t, int32_t, int32_t, int32_t, - cudnnDropoutDescriptor_t, uint32_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, algo, cellMode, biasMode, dirMode, inputMode, - dataType, mathPrec, mathType, inputSize, hiddenSize, projSize, - numLayers, dropoutDesc, auxFlags); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor_v8( - cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t *algo, - cudnnRNNMode_t *cellMode, cudnnRNNBiasMode_t *biasMode, - cudnnDirectionMode_t *dirMode, cudnnRNNInputMode_t *inputMode, - cudnnDataType_t *dataType, cudnnDataType_t *mathPrec, - cudnnMathType_t *mathType, int32_t *inputSize, int32_t *hiddenSize, - int32_t *projSize, int32_t *numLayers, - cudnnDropoutDescriptor_t *dropoutDesc, uint32_t *auxFlags) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDescriptor_t, cudnnRNNAlgo_t *, cudnnRNNMode_t *, - cudnnRNNBiasMode_t *, cudnnDirectionMode_t *, cudnnRNNInputMode_t *, - cudnnDataType_t *, cudnnDataType_t *, cudnnMathType_t *, int32_t *, - int32_t *, int32_t *, int32_t *, cudnnDropoutDescriptor_t *, uint32_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, algo, cellMode, biasMode, dirMode, inputMode, - dataType, mathPrec, mathType, inputSize, hiddenSize, projSize, - numLayers, dropoutDesc, auxFlags); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, - const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, - cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, - cudnnRNNMode_t cellMode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, - cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, - cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, cellMode, algo, mathPrec); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor_v6( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, - int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, - cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, - cudnnRNNMode_t *cellMode, cudnnRNNAlgo_t *algo, cudnnDataType_t *mathPrec) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, - cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, - cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor_v6"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, - inputMode, direction, cellMode, algo, mathPrec); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( - cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, mType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, - cudnnRNNBiasMode_t biasMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t); - static auto func_ptr = LoadSymbol("cudnnSetRNNBiasMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, biasMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, - cudnnRNNBiasMode_t *biasMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNBiasMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, biasMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t clipMode, - cudnnNanPropagation_t clipNanOpt, - double lclip, double rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, - cudnnNanPropagation_t, double, double); - static auto func_ptr = LoadSymbol("cudnnRNNSetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle, - cudnnRNNDescriptor_t rnnDesc, - cudnnRNNClipMode_t *clipMode, - cudnnNanPropagation_t *clipNanOpt, - double *lclip, double *rclip) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, - cudnnNanPropagation_t *, double *, double *); - static auto func_ptr = LoadSymbol("cudnnRNNGetClip"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); -} - -cudnnStatus_t CUDNNWINAPI -cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - const int recProjSize, const int outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); - static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, - int *outProjSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, recProjSize, outProjSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, const int minibatch, - const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, - const cudnnDataType_t, - cudnnPersistentRNNPlan_t *); - static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, minibatch, dataType, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( - cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, - cudnnPersistentRNNPlan_t); - static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, plan); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - size_t *weightSpaceSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, - cudnnRNNDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWeightSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, weightSpaceSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTempSpaceSizes( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnForwardMode_t fMode, cudnnRNNDataDescriptor_t xDesc, - size_t *workSpaceSize, size_t *reserveSpaceSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnForwardMode_t, - cudnnRNNDataDescriptor_t, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTempSpaceSizes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, fMode, xDesc, workSpaceSize, - reserveSpaceSize); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, - cudnnDataType_t dataType) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, - size_t *, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerMatDesc, linLayerMat); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, - const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, - const void *, const int, cudnnFilterDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, - linLayerBiasDesc, linLayerBias); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, - unsigned paddingMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, unsigned int); - static auto func_ptr = LoadSymbol("cudnnSetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, - unsigned *paddingMode) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudnnGetRNNPaddingMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDesc, paddingMode); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor( - cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType, - cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, - int vectorSize, - const int seqLengthArray[], /* length of each sequence in the batch */ - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, - int, const int[], void *); - static auto func_ptr = LoadSymbol("cudnnSetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, seqLengthArray, paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor( - cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t *dataType, - cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize, - int *vectorSize, int arrayLengthRequested, int seqLengthArray[], - void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, - int *, int *, int *, int, int[], void *); - static auto func_ptr = LoadSymbol("cudnnGetRNNDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, - vectorSize, arrayLengthRequested, seqLengthArray, - paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardInferenceEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateSeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroySeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetSeqDataDescriptor( - cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims, - const int dimA[], const cudnnSeqDataAxis_t axes[], - size_t seqLengthArraySize, const int seqLengthArray[], void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnSeqDataDescriptor_t, cudnnDataType_t, int, const int[], - const cudnnSeqDataAxis_t[], size_t, const int[], void *); - static auto func_ptr = LoadSymbol("cudnnSetSeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc, dataType, nbDims, dimA, axes, seqLengthArraySize, - seqLengthArray, paddingFill); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetSeqDataDescriptor( - const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t *dataType, - int *nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[], - size_t *seqLengthArraySize, size_t seqLengthSizeRequested, - int seqLengthArray[], void *paddingFill) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - const cudnnSeqDataDescriptor_t, cudnnDataType_t *, int *, int, int[], - cudnnSeqDataAxis_t[], size_t *, size_t, int[], void *); - static auto func_ptr = LoadSymbol("cudnnGetSeqDataDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(seqDataDesc, dataType, nbDims, nbDimsRequested, dimA, axes, - seqLengthArraySize, seqLengthSizeRequested, seqLengthArray, - paddingFill); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetAttnDescriptor( - cudnnAttnDescriptor_t attnDesc, unsigned attnMode, int nHeads, - double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec, - cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc, - cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize, - int qProjSize, int kProjSize, int vProjSize, int oProjSize, - int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnAttnDescriptor_t, unsigned int, int, double, cudnnDataType_t, - cudnnDataType_t, cudnnMathType_t, cudnnDropoutDescriptor_t, - cudnnDropoutDescriptor_t, int, int, int, int, int, int, int, int, int, - int, int); - static auto func_ptr = LoadSymbol("cudnnSetAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc, attnMode, nHeads, smScaler, dataType, computePrec, - mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, - vSize, qProjSize, kProjSize, vProjSize, oProjSize, - qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetAttnDescriptor( - cudnnAttnDescriptor_t attnDesc, unsigned *attnMode, int *nHeads, - double *smScaler, cudnnDataType_t *dataType, cudnnDataType_t *computePrec, - cudnnMathType_t *mathType, cudnnDropoutDescriptor_t *attnDropoutDesc, - cudnnDropoutDescriptor_t *postDropoutDesc, int *qSize, int *kSize, - int *vSize, int *qProjSize, int *kProjSize, int *vProjSize, int *oProjSize, - int *qoMaxSeqLength, int *kvMaxSeqLength, int *maxBatchSize, - int *maxBeamSize) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnAttnDescriptor_t, unsigned int *, int *, double *, cudnnDataType_t *, - cudnnDataType_t *, cudnnMathType_t *, cudnnDropoutDescriptor_t *, - cudnnDropoutDescriptor_t *, int *, int *, int *, int *, int *, int *, - int *, int *, int *, int *, int *); - static auto func_ptr = LoadSymbol("cudnnGetAttnDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attnDesc, attnMode, nHeads, smScaler, dataType, computePrec, - mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, - vSize, qProjSize, kProjSize, vProjSize, oProjSize, - qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnBuffers( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - size_t *weightSizeInBytes, size_t *workSpaceSizeInBytes, - size_t *reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, size_t *, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnBuffers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, weightSizeInBytes, workSpaceSizeInBytes, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnWeights( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes, - const void *weights, cudnnTensorDescriptor_t wDesc, void **wAddr) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, - cudnnMultiHeadAttnWeightKind_t, size_t, const void *, - cudnnTensorDescriptor_t, void **); - static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, wKind, weightSizeInBytes, weights, wDesc, - wAddr); -} - -cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnForward( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx, - const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsQO[], - const int devSeqLengthsKV[], const cudnnSeqDataDescriptor_t qDesc, - const void *queries, const void *residuals, - const cudnnSeqDataDescriptor_t kDesc, const void *keys, - const cudnnSeqDataDescriptor_t vDesc, const void *values, - const cudnnSeqDataDescriptor_t oDesc, void *out, size_t weightSizeInBytes, - const void *weights, size_t workSpaceSizeInBytes, void *workSpace, - size_t reserveSpaceSizeInBytes, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, int, const int[], const int[], - const int[], const int[], const cudnnSeqDataDescriptor_t, const void *, - const void *, const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, void *, size_t, const void *, size_t, - void *, size_t, void *); - static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, currIdx, loWinIdx, hiWinIdx, - devSeqLengthsQO, devSeqLengthsKV, qDesc, queries, residuals, - kDesc, keys, vDesc, values, oDesc, out, weightSizeInBytes, - weights, workSpaceSizeInBytes, workSpace, - reserveSpaceSizeInBytes, reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnAdvInferVersionCheck(void) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnAdvInferVersionCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( - cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, - dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( - cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( - cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnActivationDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnActivationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, - beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, - const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, - const cudnnTensorDescriptor_t dxDesc, void *dx) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, - x, beta, dxDesc, dx); -} - -cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( - cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, - cudnnDivNormMode_t mode, const void *alpha, - const cudnnTensorDescriptor_t - xDesc, /* same desc for x, means, dy, temp, temp2 */ - const void *x, - const void *means, /* if NULL, means are assumed to be zero */ - const void *dy, void *temp, void *temp2, const void *beta, - const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ - void *dx, /* output x differential */ - void *dMeans) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *, void *, const void *, const cudnnTensorDescriptor_t, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnDivisiveNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, - temp2, beta, dXdMeansDesc, dx, dMeans); -} - -cudnnStatus_t CUDNNWINAPI -cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, - const cudnnTensorDescriptor_t yDesc, - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnActivationDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol( - "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, - bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, - const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, - const cudnnTensorDescriptor_t dxDesc, - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, - const cudnnActivationDescriptor_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetBatchNormalizationBackwardExWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, - dBnScaleBiasDesc, activationDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - const cudnnActivationDescriptor_t activationDesc, - const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, - const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, - size_t *); - static auto func_ptr = LoadSymbol( - "cudnnGetBatchNormalizationTrainingExReserveSpaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ - const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ - - /* Shared desc for the next 6 tensors in the argument list. - Data type to be set as follows: - type = (typeOf(x) == double) ? double : float - Dimensions for this descriptor depend on normalization mode - - Spatial Normalization : tensors are expected to have dims 1xCx1x1 - (normalization is performed across NxHxW) - - Per-Activation Normalization : tensors are expected to have dims of - 1xCxHxW (normalization is performed across N) */ - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, - - /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation - */ - const void *bnScale, const void *bnBias, - - /* MUST use factor=1 in the very first call of a complete training cycle. - Use a factor=1/(1+n) at N-th call to the function to get - Cumulative Moving Average (CMA) behavior - CMA[n] = (x[1]+...+x[n])/n - Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = - ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = - CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ - double exponentialAverageFactor, - - /* Used in Training phase only. - runningMean = newMean*factor + runningMean*(1-factor) */ - void *resultRunningMean, - /* Output in training mode, input in inference. Is the moving average - of variance[x] (factor is applied in the same way as for runningMean) */ - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, - bnScale, bnBias, exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - - const void *alpha, /* alpha[0] = result blend factor */ - const void *beta, /* beta[0] = dest layer blend factor */ - - const cudnnTensorDescriptor_t xDesc, const void *xData, - const cudnnTensorDescriptor_t zDesc, const void *zData, - const cudnnTensorDescriptor_t yDesc, void *yData, - - const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, - const void *bnBias, - - double exponentialAverageFactor, void *resultRunningMean, - void *resultRunningVariance, - - /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and - backward functions. */ - double epsilon, - - /* Optionally save intermediate results from the forward pass here - - can be reused to speed up backward pass. NULL if unused */ - void *resultSaveMean, void *resultSaveInvVariance, - - cudnnActivationDescriptor_t activationDesc, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, const void *, double, void *, void *, double, void *, - void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, - yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, - exponentialAverageFactor, resultRunningMean, - resultRunningVariance, epsilon, resultSaveMean, - resultSaveInvVariance, activationDesc, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, - const void *betaDataDiff, const void *alphaParamDiff, - const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ - const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnTensorDescriptor_t dxDesc, void *dx, - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, - const void *bnScale, /* bnBias doesn't affect backpropagation */ - /* scale and bias diff are not backpropagated below this layer */ - void *dBnScaleResult, void *dBnBiasResult, - /* Same epsilon as forward pass */ - double epsilon, - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, - const void *, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - const void *, void *, void *, double, const void *, const void *); - static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, - dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, - epsilon, savedMean, savedInvVariance); -} - -cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx( - cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, - - const void *alphaDataDiff, const void *betaDataDiff, - const void *alphaParamDiff, const void *betaParamDiff, - const cudnnTensorDescriptor_t xDesc, const void *xData, - const cudnnTensorDescriptor_t yDesc, const void *yData, - const cudnnTensorDescriptor_t dyDesc, const void *dyData, - const cudnnTensorDescriptor_t dzDesc, void *dzData, - const cudnnTensorDescriptor_t dxDesc, void *dxData, - - /* Shared tensor desc for the 4 tensors below */ - const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData, - const void *bnBiasData, /* needed if there is activation */ - void *dBnScaleData, void *dBnBiasData, - double epsilon, /* Same epsilon as forward pass */ - - /* Optionally cached intermediate results from - forward pass */ - const void *savedMean, const void *savedInvVariance, - cudnnActivationDescriptor_t activationDesc, void *workSpace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, - const void *, const void *, const void *, const cudnnTensorDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, - void *, double, const void *, const void *, cudnnActivationDescriptor_t, - void *, size_t, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnBatchNormalizationBackwardEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr( - handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, - betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, - dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, - dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, - workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( - cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, - const void *dgrid, void *dtheta) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, - void *); - static auto func_ptr = - LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, dgrid, dtheta); -} - -cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( - cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, - const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, - const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, - const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, - const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, - const cudnnTensorDescriptor_t, void *, const void *, - const cudnnTensorDescriptor_t, const void *, const void *, const void *, - void *); - static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, - dyDesc, dy, grid, betaDgrid, dgrid); -} - -cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( - cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, - const cudnnTensorDescriptor_t dydesc, const void *dy, - const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnDropoutDescriptor_t, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnOpsTrainVersionCheck(void) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnOpsTrainVersionCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( - cudnnHandle_t handle, int *count) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, count); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, - returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *y, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t dwDesc, void *dw, - const int requestedAlgoCount, int *returnedAlgoCount, - cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, - const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); - static auto func_ptr = - LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, - requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, - workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( - cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, - const cudnnTensorDescriptor_t diffDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, - int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, const int, int *, - cudnnConvolutionBwdFilterAlgoPerf_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, - requestedAlgoCount, returnedAlgoCount, perfResults); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( - cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, - const cudnnTensorDescriptor_t dyDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnFilterDescriptor_t gradDesc, - cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, - const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); - static auto func_ptr = - LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t dyDesc, const void *dy, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, - size_t workSpaceSizeInBytes, const void *beta, - const cudnnFilterDescriptor_t dwDesc, void *dw) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, - void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, - workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); -} - -cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( - cudnnHandle_t handle, const void *alpha, - const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, - const cudnnTensorDescriptor_t dbDesc, void *db) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, - const void *, const cudnnTensorDescriptor_t, void *); - static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t *yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, - size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, - wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI -cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *yDesc, - const void *y, const cudnnTensorDescriptor_t *dyDesc, - const void *dy, const cudnnTensorDescriptor_t dhyDesc, - const void *dhy, const cudnnTensorDescriptor_t dcyDesc, - const void *dcy, const cudnnFilterDescriptor_t wDesc, - const void *w, const cudnnTensorDescriptor_t hxDesc, - const void *hx, const cudnnTensorDescriptor_t cxDesc, - const void *cx, const cudnnTensorDescriptor_t *dxDesc, - void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - void *workspace, size_t workSpaceSizeInBytes, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, - dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, - dx, dhxDesc, dhx, dcxDesc, dcx, workspace, - workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - const void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const int, - const cudnnTensorDescriptor_t *, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t *, const void *, const void *, size_t, - const cudnnFilterDescriptor_t, void *, const void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, - workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnRNNDataDescriptor_t yDesc, void *y, - const cudnnTensorDescriptor_t hyDesc, void *hy, - const cudnnTensorDescriptor_t cyDesc, void *cy, - const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ - const void *keys, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ - void *cAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ - void *iAttn, /* reserved, should pass NULL */ - const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ - void *queries, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, - void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, - size_t); - static auto func_ptr = LoadSymbol("cudnnRNNForwardTrainingEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, - yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, - iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, - reserveSpace, reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNForward( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnForwardMode_t fwdMode, const int32_t devSeqLengths[], - cudnnRNNDataDescriptor_t xDesc, const void *x, - cudnnRNNDataDescriptor_t yDesc, void *y, cudnnTensorDescriptor_t hDesc, - const void *hx, void *hy, cudnnTensorDescriptor_t cDesc, const void *cx, - void *cy, size_t weightSpaceSize, const void *weightSpace, - size_t workSpaceSize, void *workSpace, size_t reserveSpaceSize, - void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnForwardMode_t, const int32_t[], - cudnnRNNDataDescriptor_t, const void *, cudnnRNNDataDescriptor_t, void *, - cudnnTensorDescriptor_t, const void *, void *, cudnnTensorDescriptor_t, - const void *, void *, size_t, const void *, size_t, void *, size_t, - void *); - static auto func_ptr = LoadSymbol("cudnnRNNForward"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, fwdMode, devSeqLengths, xDesc, x, yDesc, y, - hDesc, hx, hy, cDesc, cx, cy, weightSpaceSize, weightSpace, - workSpaceSize, workSpace, reserveSpaceSize, reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t yDesc, const void *y, - const cudnnRNNDataDescriptor_t dyDesc, const void *dy, - const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */ - const void *dcAttn, /* reserved, should pass NULL */ - const cudnnTensorDescriptor_t dhyDesc, const void *dhy, - const cudnnTensorDescriptor_t dcyDesc, const void *dcy, - const cudnnFilterDescriptor_t wDesc, const void *w, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnTensorDescriptor_t cxDesc, const void *cx, - const cudnnRNNDataDescriptor_t dxDesc, void *dx, - const cudnnTensorDescriptor_t dhxDesc, void *dhx, - const cudnnTensorDescriptor_t dcxDesc, void *dcx, - const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */ - void *dkeys, /* reserved, should pass NULL */ - void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, - size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnRNNDataDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnFilterDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, - void *, const cudnnTensorDescriptor_t, void *, - const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardDataEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, - dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, - dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, - workSpace, workSpaceSizeInBytes, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardData_v8( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t yDesc, - const void *y, const void *dy, cudnnRNNDataDescriptor_t xDesc, void *dx, - cudnnTensorDescriptor_t hDesc, const void *hx, const void *dhy, void *dhx, - cudnnTensorDescriptor_t cDesc, const void *cx, const void *dcy, void *dcx, - size_t weightSpaceSize, const void *weightSpace, size_t workSpaceSize, - void *workSpace, size_t reserveSpaceSize, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, const int32_t[], - cudnnRNNDataDescriptor_t, const void *, const void *, - cudnnRNNDataDescriptor_t, void *, cudnnTensorDescriptor_t, const void *, - const void *, void *, cudnnTensorDescriptor_t, const void *, const void *, - void *, size_t, const void *, size_t, void *, size_t, void *); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardData_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, devSeqLengths, yDesc, y, dy, xDesc, dx, - hDesc, hx, dhy, dhx, cDesc, cx, dcy, dcx, weightSpaceSize, - weightSpace, workSpaceSize, workSpace, reserveSpaceSize, - reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx( - cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, - const cudnnRNNDataDescriptor_t xDesc, const void *x, - const cudnnTensorDescriptor_t hxDesc, const void *hx, - const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace, - size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, - void *reserveSpace, size_t reserveSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, - const void *, const cudnnTensorDescriptor_t, const void *, - const cudnnRNNDataDescriptor_t, const void *, void *, size_t, - const cudnnFilterDescriptor_t, void *, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeightsEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, - workSpaceSizeInBytes, dwDesc, dw, reserveSpace, - reserveSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights_v8( - cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, - cudnnWgradMode_t addGrad, const int32_t devSeqLengths[], - cudnnRNNDataDescriptor_t xDesc, const void *x, - cudnnTensorDescriptor_t hDesc, const void *hx, - cudnnRNNDataDescriptor_t yDesc, const void *y, size_t weightSpaceSize, - void *dweightSpace, size_t workSpaceSize, void *workSpace, - size_t reserveSpaceSize, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnRNNDescriptor_t, cudnnWgradMode_t, const int32_t[], - cudnnRNNDataDescriptor_t, const void *, cudnnTensorDescriptor_t, - const void *, cudnnRNNDataDescriptor_t, const void *, size_t, void *, - size_t, void *, size_t, void *); - static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rnnDesc, addGrad, devSeqLengths, xDesc, x, hDesc, hx, - yDesc, y, weightSpaceSize, dweightSpace, workSpaceSize, - workSpace, reserveSpaceSize, reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardData( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsDQDO[], - const int devSeqLengthsDKDV[], const cudnnSeqDataDescriptor_t doDesc, - const void *dout, const cudnnSeqDataDescriptor_t dqDesc, void *dqueries, - const void *queries, const cudnnSeqDataDescriptor_t dkDesc, void *dkeys, - const void *keys, const cudnnSeqDataDescriptor_t dvDesc, void *dvalues, - const void *values, size_t weightSizeInBytes, const void *weights, - size_t workSpaceSizeInBytes, void *workSpace, - size_t reserveSpaceSizeInBytes, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, const int[], const int[], - const int[], const int[], const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, void *, const void *, - const cudnnSeqDataDescriptor_t, void *, const void *, - const cudnnSeqDataDescriptor_t, void *, const void *, size_t, - const void *, size_t, void *, size_t, void *); - static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnBackwardData"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, loWinIdx, hiWinIdx, devSeqLengthsDQDO, - devSeqLengthsDKDV, doDesc, dout, dqDesc, dqueries, queries, - dkDesc, dkeys, keys, dvDesc, dvalues, values, - weightSizeInBytes, weights, workSpaceSizeInBytes, workSpace, - reserveSpaceSizeInBytes, reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardWeights( - cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, - cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc, - const void *queries, const cudnnSeqDataDescriptor_t kDesc, const void *keys, - const cudnnSeqDataDescriptor_t vDesc, const void *values, - const cudnnSeqDataDescriptor_t doDesc, const void *dout, - size_t weightSizeInBytes, const void *weights, void *dweights, - size_t workSpaceSizeInBytes, void *workSpace, - size_t reserveSpaceSizeInBytes, void *reserveSpace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnWgradMode_t, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, - const cudnnSeqDataDescriptor_t, const void *, size_t, const void *, - void *, size_t, void *, size_t, void *); - static auto func_ptr = - LoadSymbol("cudnnMultiHeadAttnBackwardWeights"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, attnDesc, addGrad, qDesc, queries, kDesc, keys, vDesc, - values, doDesc, dout, weightSizeInBytes, weights, dweights, - workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, - reserveSpace); -} - -cudnnStatus_t CUDNNWINAPI -cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); - static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptorEx( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, - cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, - cudnnNanPropagation_t); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType, normMode, gradMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor_v8( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, - cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode, - int maxLabelLength) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, - cudnnNanPropagation_t, int); - static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType, normMode, gradMode, maxLabelLength); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { - using FuncPtr = - cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptorEx( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, - cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnCTCLossDescriptor_t, cudnnDataType_t *, - cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptorEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType, normMode, gradMode); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor_v8( - cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, - cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode, - int *maxLabelLength) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnCTCLossDescriptor_t, cudnnDataType_t *, - cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *, int *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc, compType, normMode, gradMode, maxLabelLength); -} - -cudnnStatus_t CUDNNWINAPI -cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); - static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctcLossDesc); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the - mini batch size, A is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int hostLabels[], /* labels, in CPU memory */ - const int hostLabelLengths[], /* the length of each label, in CPU memory */ - const int hostInputLengths[], /* the lengths of timing steps in each batch, - in CPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - void *gradients, /* the returned CTC gradients, in GPU memory, to compute - costs only, set it to NULL */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - void *workspace, /* pointer to the workspace, in GPU memory */ - size_t workSpaceSizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int[], - const int[], const int[], void *, const cudnnTensorDescriptor_t, void *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t); - static auto func_ptr = LoadSymbol("cudnnCTCLoss"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, probs, hostLabels, hostLabelLengths, - hostInputLengths, costs, gradientsDesc, gradients, algo, - ctcLossDesc, workspace, workSpaceSizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnCTCLoss_v8( - cudnnHandle_t handle, - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the timing steps, N is the - mini batch size, A is the alphabet size) */ - const void *probs, /* probabilities after softmax, in GPU memory */ - const int labels[], /* labels, in GPU memory */ - const int labelLengths[], /* the length of each label, in GPU memory */ - const int inputLengths[], /* the lengths of timing steps in each batch, in - GPU memory */ - void *costs, /* the returned costs of CTC, in GPU memory */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the dimensions are - T,N,A */ - void *gradients, /* the returned CTC gradients, in GPU memory, to compute - costs only, set it to NULL */ - size_t workSpaceSizeInBytes, /* size of the workspace */ - void *workspace) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, - const cudnnTensorDescriptor_t, const void *, const int[], const int[], - const int[], void *, const cudnnTensorDescriptor_t, void *, size_t, - void *); - static auto func_ptr = LoadSymbol("cudnnCTCLoss_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, ctcLossDesc, probsDesc, probs, labels, - labelLengths, inputLengths, costs, gradientsDesc, gradients, - workSpaceSizeInBytes, workspace); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( - cudnnHandle_t handle, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the - timing steps, N is the mini batch size, A is the alphabet - size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the - dimensions are T,N,A. To compute costs - only, set it to NULL */ - const int *labels, /* labels, in CPU memory */ - const int *labelLengths, /* the length of each label, in CPU memory */ - const int *inputLengths, /* the lengths of timing steps in each batch, in - CPU memory */ - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, const cudnnTensorDescriptor_t, - const cudnnTensorDescriptor_t, const int *, const int *, const int *, - cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, - inputLengths, algo, ctcLossDesc, sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize_v8( - cudnnHandle_t handle, - cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ - cudnnCTCLossDescriptor_t ctcLossDesc, - const cudnnTensorDescriptor_t - probsDesc, /* Tensor descriptor for probabilities, the dimensions are - T,N,A (T is the - timing steps, N is the mini batch size, A is the alphabet - size) */ - const cudnnTensorDescriptor_t - gradientsDesc, /* Tensor descriptor for gradients, the - dimensions are T,N,A. To compute costs - only, set it to NULL */ - size_t *sizeInBytes) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( - cudnnHandle_t, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, - const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); - static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize_v8"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, ctcLossDesc, probsDesc, gradientsDesc, - sizeInBytes); -} - -cudnnStatus_t CUDNNWINAPI cudnnAdvTrainVersionCheck(void) { - using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); - static auto func_ptr = LoadSymbol("cudnnAdvTrainVersionCheck"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_stub.cc index 1f86d27e100f22..705dae12f903f3 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudnn_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cudnn_stub.cc @@ -33,35 +33,40 @@ void* GetDsoHandle() { #endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -cudnnStatus_t GetSymbolNotFoundError() { return CUDNN_STATUS_INTERNAL_ERROR; } +const char* kSymbols[] = { +#include "tsl/cuda/cudnn.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + } // namespace -#if CUDNN_MAJOR < 6 -#error cuDNN version earlier than 6 is not supported. -#elif CUDNN_MAJOR < 7 -#include "tsl/cuda/cudnn_6_0.inc" -#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 1 -#include "tsl/cuda/cudnn_7_0.inc" -// 2 instead of 3: see https://github.com/tensorflow/tensorflow/issues/32350 -#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 2 -#include "tsl/cuda/cudnn_7_1.inc" -#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 4 -#include "tsl/cuda/cudnn_7_3.inc" -#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 6 -#include "tsl/cuda/cudnn_7_4.inc" -#elif CUDNN_MAJOR == 7 -#include "tsl/cuda/cudnn_7_6.inc" -#else -#include "tsl/cuda/cudnn_8_0.inc" -#endif +extern "C" { + +static cudnnStatus_t GetSymbolNotFoundError() { + return CUDNN_STATUS_INTERNAL_ERROR; +} + +extern void* _cudnn_tramp_table[]; + +void _cudnn_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cudnn_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cufft.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cufft.symbols new file mode 100644 index 00000000000000..605815200bd90e --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cufft.symbols @@ -0,0 +1,58 @@ +cufftCreate +cufftDebug +cufftDestroy +cufftEnterCS +cufftEstimate1d +cufftEstimate2d +cufftEstimate3d +cufftEstimateMany +cufftExecC2C +cufftExecC2R +cufftExecD2Z +cufftExecR2C +cufftExecZ2D +cufftExecZ2Z +cufftGetProperty +cufftGetSize +cufftGetSize1d +cufftGetSize2d +cufftGetSize3d +cufftGetSizeMany +cufftGetSizeMany64 +cufftGetVersion +cufftLeaveCS +cufftMakePlan1d +cufftMakePlan2d +cufftMakePlan3d +cufftMakePlanGuru64 +cufftMakePlanMany +cufftMakePlanMany64 +cufftPlan1d +cufftPlan2d +cufftPlan3d +cufftPlanMany +cufftSetAutoAllocation +cufftSetStream +cufftSetWorkArea +cufftXtClearCallback +cufftXtExec +cufftXtExecDescriptor +cufftXtExecDescriptorC2C +cufftXtExecDescriptorC2R +cufftXtExecDescriptorD2Z +cufftXtExecDescriptorR2C +cufftXtExecDescriptorZ2D +cufftXtExecDescriptorZ2Z +cufftXtFree +cufftXtGetSizeMany +cufftXtMakePlanGuru64 +cufftXtMakePlanMany +cufftXtMalloc +cufftXtMemcpy +cufftXtQueryPlan +cufftXtSetCallback +cufftXtSetCallbackSharedSize +cufftXtSetDistribution +cufftXtSetGPUs +cufftXtSetWorkArea +cufftXtSetWorkAreaPolicy diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cufft_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cufft_10_0.inc deleted file mode 100644 index 72068864f748dc..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cufft_10_0.inc +++ /dev/null @@ -1,361 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type, - int batch) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, cufftType, int); - static auto func_ptr = LoadSymbol("cufftPlan1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, type, batch); -} - -cufftResult CUFFTAPI cufftPlan2d(cufftHandle *plan, int nx, int ny, - cufftType type) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int, cufftType); - static auto func_ptr = LoadSymbol("cufftPlan2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, type); -} - -cufftResult CUFFTAPI cufftPlan3d(cufftHandle *plan, int nx, int ny, int nz, - cufftType type) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle *, int, int, int, cufftType); - static auto func_ptr = LoadSymbol("cufftPlan3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, nz, type); -} - -cufftResult CUFFTAPI cufftPlanMany(cufftHandle *plan, int rank, int *n, - int *inembed, int istride, int idist, - int *onembed, int ostride, int odist, - cufftType type, int batch) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int *, int *, int, - int, int *, int, int, cufftType, int); - static auto func_ptr = LoadSymbol("cufftPlanMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch); -} - -cufftResult CUFFTAPI cufftMakePlan1d(cufftHandle plan, int nx, cufftType type, - int batch, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlan1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftMakePlan2d(cufftHandle plan, int nx, int ny, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlan2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, type, workSize); -} - -cufftResult CUFFTAPI cufftMakePlan3d(cufftHandle plan, int nx, int ny, int nz, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlan3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, nz, type, workSize); -} - -cufftResult CUFFTAPI cufftMakePlanMany(cufftHandle plan, int rank, int *n, - int *inembed, int istride, int idist, - int *onembed, int ostride, int odist, - cufftType type, int batch, - size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *, - int, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlanMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftMakePlanMany64( - cufftHandle plan, int rank, long long int *n, long long int *inembed, - long long int istride, long long int idist, long long int *onembed, - long long int ostride, long long int odist, cufftType type, - long long int batch, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)( - cufftHandle, int, long long *, long long *, long long, long long, - long long *, long long, long long, cufftType, long long, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlanMany64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftGetSizeMany64( - cufftHandle plan, int rank, long long int *n, long long int *inembed, - long long int istride, long long int idist, long long int *onembed, - long long int ostride, long long int odist, cufftType type, - long long int batch, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)( - cufftHandle, int, long long *, long long *, long long, long long, - long long *, long long, long long, cufftType, long long, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSizeMany64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftEstimate1d(int nx, cufftType type, int batch, - size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimate1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nx, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftEstimate2d(int nx, int ny, cufftType type, - size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimate2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nx, ny, type, workSize); -} - -cufftResult CUFFTAPI cufftEstimate3d(int nx, int ny, int nz, cufftType type, - size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimate3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nx, ny, nz, type, workSize); -} - -cufftResult CUFFTAPI cufftEstimateMany(int rank, int *n, int *inembed, - int istride, int idist, int *onembed, - int ostride, int odist, cufftType type, - int batch, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, int *, int *, int, int, int *, - int, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimateMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rank, n, inembed, istride, idist, onembed, ostride, odist, - type, batch, workSize); -} - -cufftResult CUFFTAPI cufftCreate(cufftHandle *handle) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *); - static auto func_ptr = LoadSymbol("cufftCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cufftResult CUFFTAPI cufftGetSize1d(cufftHandle handle, int nx, cufftType type, - int batch, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nx, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftGetSize2d(cufftHandle handle, int nx, int ny, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nx, ny, type, workSize); -} - -cufftResult CUFFTAPI cufftGetSize3d(cufftHandle handle, int nx, int ny, int nz, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nx, ny, nz, type, workSize); -} - -cufftResult CUFFTAPI cufftGetSizeMany(cufftHandle handle, int rank, int *n, - int *inembed, int istride, int idist, - int *onembed, int ostride, int odist, - cufftType type, int batch, - size_t *workArea) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *, - int, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSizeMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workArea); -} - -cufftResult CUFFTAPI cufftGetSize(cufftHandle handle, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, workSize); -} - -cufftResult CUFFTAPI cufftSetWorkArea(cufftHandle plan, void *workArea) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, void *); - static auto func_ptr = LoadSymbol("cufftSetWorkArea"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, workArea); -} - -cufftResult CUFFTAPI cufftSetAutoAllocation(cufftHandle plan, - int autoAllocate) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, int); - static auto func_ptr = LoadSymbol("cufftSetAutoAllocation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, autoAllocate); -} - -cufftResult CUFFTAPI cufftExecC2C(cufftHandle plan, cufftComplex *idata, - cufftComplex *odata, int direction) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftComplex *, int); - static auto func_ptr = LoadSymbol("cufftExecC2C"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata, direction); -} - -cufftResult CUFFTAPI cufftExecR2C(cufftHandle plan, cufftReal *idata, - cufftComplex *odata) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, cufftReal *, cufftComplex *); - static auto func_ptr = LoadSymbol("cufftExecR2C"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftExecC2R(cufftHandle plan, cufftComplex *idata, - cufftReal *odata) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftReal *); - static auto func_ptr = LoadSymbol("cufftExecC2R"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftExecZ2Z(cufftHandle plan, cufftDoubleComplex *idata, - cufftDoubleComplex *odata, int direction) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *, - cufftDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cufftExecZ2Z"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata, direction); -} - -cufftResult CUFFTAPI cufftExecD2Z(cufftHandle plan, cufftDoubleReal *idata, - cufftDoubleComplex *odata) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleReal *, - cufftDoubleComplex *); - static auto func_ptr = LoadSymbol("cufftExecD2Z"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftExecZ2D(cufftHandle plan, cufftDoubleComplex *idata, - cufftDoubleReal *odata) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *, - cufftDoubleReal *); - static auto func_ptr = LoadSymbol("cufftExecZ2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftSetStream(cufftHandle plan, cudaStream_t stream) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cudaStream_t); - static auto func_ptr = LoadSymbol("cufftSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, stream); -} - -cufftResult CUFFTAPI cufftDestroy(cufftHandle plan) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle); - static auto func_ptr = LoadSymbol("cufftDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cufftResult CUFFTAPI cufftGetVersion(int *version) { - using FuncPtr = cufftResult(CUFFTAPI *)(int *); - static auto func_ptr = LoadSymbol("cufftGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cufftResult CUFFTAPI cufftGetProperty(libraryPropertyType type, int *value) { - using FuncPtr = cufftResult(CUFFTAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cufftGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -// The cufftXt API functions below are not currently used, they are merely here -// to allow replacing TFRT's cuda_stubs with cuda_runtime_for_xlir. - -cufftResult CUFFTAPI cufftXtMakePlanMany( - cufftHandle plan, int rank, long long int* n, long long int* inembed, - long long int istride, long long int idist, cudaDataType inputtype, - long long int* onembed, long long int ostride, long long int odist, - cudaDataType outputtype, long long int batch, size_t* workSize, - cudaDataType executiontype) { - using FuncPtr = cufftResult(CUFFTAPI*)( - cufftHandle, int, long long int*, long long int*, long long int, - long long int, cudaDataType, long long int*, long long int, long long int, - cudaDataType, long long int, size_t*, cudaDataType); - static auto func_ptr = LoadSymbol("cufftXtMakePlanMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, inputtype, onembed, - ostride, odist, outputtype, batch, workSize, executiontype); -} - -cufftResult CUFFTAPI cufftXtGetSizeMany( - cufftHandle plan, int rank, long long int* n, long long int* inembed, - long long int istride, long long int idist, cudaDataType inputtype, - long long int* onembed, long long int ostride, long long int odist, - cudaDataType outputtype, long long int batch, size_t* workSize, - cudaDataType executiontype) { - using FuncPtr = cufftResult(CUFFTAPI*)( - cufftHandle, int, long long int*, long long int*, long long int, - long long int, cudaDataType, long long int*, long long int, long long int, - cudaDataType, long long int, size_t*, cudaDataType); - static auto func_ptr = LoadSymbol("cufftXtGetSizeMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, inputtype, onembed, - ostride, odist, outputtype, batch, workSize, executiontype); -} - -cufftResult CUFFTAPI cufftXtSetWorkAreaPolicy(cufftHandle plan, - cufftXtWorkAreaPolicy policy, - size_t* workSize) { - using FuncPtr = - cufftResult(CUFFTAPI*)(cufftHandle, cufftXtWorkAreaPolicy, size_t*); - static auto func_ptr = LoadSymbol("cufftXtGetSizeMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, policy, workSize); -} - -cufftResult CUFFTAPI cufftXtExec(cufftHandle plan, void* input, void* output, - int direction) { - using FuncPtr = cufftResult(CUFFTAPI*)(cufftHandle, void*, void*, int); - static auto func_ptr = LoadSymbol("cufftXtGetSizeMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, input, output, direction); -} - -cufftResult CUFFTAPI cufftXtMemcpy(cufftHandle plan, void* dstPointer, - void* srcPointer, cufftXtCopyType type) { - using FuncPtr = - cufftResult(CUFFTAPI*)(cufftHandle, void*, void*, cufftXtCopyType); - static auto func_ptr = LoadSymbol("cufftXtGetSizeMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, dstPointer, srcPointer, type); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cufft_9_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cufft_9_0.inc deleted file mode 100644 index e6244f0705d7ee..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cufft_9_0.inc +++ /dev/null @@ -1,307 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type, - int batch) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, cufftType, int); - static auto func_ptr = LoadSymbol("cufftPlan1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, type, batch); -} - -cufftResult CUFFTAPI cufftPlan2d(cufftHandle *plan, int nx, int ny, - cufftType type) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int, cufftType); - static auto func_ptr = LoadSymbol("cufftPlan2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, type); -} - -cufftResult CUFFTAPI cufftPlan3d(cufftHandle *plan, int nx, int ny, int nz, - cufftType type) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle *, int, int, int, cufftType); - static auto func_ptr = LoadSymbol("cufftPlan3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, nz, type); -} - -cufftResult CUFFTAPI cufftPlanMany(cufftHandle *plan, int rank, int *n, - int *inembed, int istride, int idist, - int *onembed, int ostride, int odist, - cufftType type, int batch) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int *, int *, int, - int, int *, int, int, cufftType, int); - static auto func_ptr = LoadSymbol("cufftPlanMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch); -} - -cufftResult CUFFTAPI cufftMakePlan1d(cufftHandle plan, int nx, cufftType type, - int batch, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlan1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftMakePlan2d(cufftHandle plan, int nx, int ny, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlan2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, type, workSize); -} - -cufftResult CUFFTAPI cufftMakePlan3d(cufftHandle plan, int nx, int ny, int nz, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlan3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, nx, ny, nz, type, workSize); -} - -cufftResult CUFFTAPI cufftMakePlanMany(cufftHandle plan, int rank, int *n, - int *inembed, int istride, int idist, - int *onembed, int ostride, int odist, - cufftType type, int batch, - size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *, - int, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlanMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftMakePlanMany64( - cufftHandle plan, int rank, long long int *n, long long int *inembed, - long long int istride, long long int idist, long long int *onembed, - long long int ostride, long long int odist, cufftType type, - long long int batch, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)( - cufftHandle, int, long long *, long long *, long long, long long, - long long *, long long, long long, cufftType, long long, size_t *); - static auto func_ptr = LoadSymbol("cufftMakePlanMany64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftGetSizeMany64( - cufftHandle plan, int rank, long long int *n, long long int *inembed, - long long int istride, long long int idist, long long int *onembed, - long long int ostride, long long int odist, cufftType type, - long long int batch, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)( - cufftHandle, int, long long *, long long *, long long, long long, - long long *, long long, long long, cufftType, long long, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSizeMany64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftEstimate1d(int nx, cufftType type, int batch, - size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimate1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nx, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftEstimate2d(int nx, int ny, cufftType type, - size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimate2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nx, ny, type, workSize); -} - -cufftResult CUFFTAPI cufftEstimate3d(int nx, int ny, int nz, cufftType type, - size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimate3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nx, ny, nz, type, workSize); -} - -cufftResult CUFFTAPI cufftEstimateMany(int rank, int *n, int *inembed, - int istride, int idist, int *onembed, - int ostride, int odist, cufftType type, - int batch, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(int, int *, int *, int, int, int *, - int, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftEstimateMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(rank, n, inembed, istride, idist, onembed, ostride, odist, - type, batch, workSize); -} - -cufftResult CUFFTAPI cufftCreate(cufftHandle *handle) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *); - static auto func_ptr = LoadSymbol("cufftCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cufftResult CUFFTAPI cufftGetSize1d(cufftHandle handle, int nx, cufftType type, - int batch, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize1d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nx, type, batch, workSize); -} - -cufftResult CUFFTAPI cufftGetSize2d(cufftHandle handle, int nx, int ny, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize2d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nx, ny, type, workSize); -} - -cufftResult CUFFTAPI cufftGetSize3d(cufftHandle handle, int nx, int ny, int nz, - cufftType type, size_t *workSize) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize3d"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nx, ny, nz, type, workSize); -} - -cufftResult CUFFTAPI cufftGetSizeMany(cufftHandle handle, int rank, int *n, - int *inembed, int istride, int idist, - int *onembed, int ostride, int odist, - cufftType type, int batch, - size_t *workArea) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *, - int, int, cufftType, int, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSizeMany"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, rank, n, inembed, istride, idist, onembed, ostride, - odist, type, batch, workArea); -} - -cufftResult CUFFTAPI cufftGetSize(cufftHandle handle, size_t *workSize) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, size_t *); - static auto func_ptr = LoadSymbol("cufftGetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, workSize); -} - -cufftResult CUFFTAPI cufftSetWorkArea(cufftHandle plan, void *workArea) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, void *); - static auto func_ptr = LoadSymbol("cufftSetWorkArea"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, workArea); -} - -cufftResult CUFFTAPI cufftSetAutoAllocation(cufftHandle plan, - int autoAllocate) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, int); - static auto func_ptr = LoadSymbol("cufftSetAutoAllocation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, autoAllocate); -} - -cufftResult CUFFTAPI cufftExecC2C(cufftHandle plan, cufftComplex *idata, - cufftComplex *odata, int direction) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftComplex *, int); - static auto func_ptr = LoadSymbol("cufftExecC2C"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata, direction); -} - -cufftResult CUFFTAPI cufftExecR2C(cufftHandle plan, cufftReal *idata, - cufftComplex *odata) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, cufftReal *, cufftComplex *); - static auto func_ptr = LoadSymbol("cufftExecR2C"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftExecC2R(cufftHandle plan, cufftComplex *idata, - cufftReal *odata) { - using FuncPtr = - cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftReal *); - static auto func_ptr = LoadSymbol("cufftExecC2R"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftExecZ2Z(cufftHandle plan, cufftDoubleComplex *idata, - cufftDoubleComplex *odata, int direction) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *, - cufftDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cufftExecZ2Z"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata, direction); -} - -cufftResult CUFFTAPI cufftExecD2Z(cufftHandle plan, cufftDoubleReal *idata, - cufftDoubleComplex *odata) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleReal *, - cufftDoubleComplex *); - static auto func_ptr = LoadSymbol("cufftExecD2Z"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftExecZ2D(cufftHandle plan, cufftDoubleComplex *idata, - cufftDoubleReal *odata) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *, - cufftDoubleReal *); - static auto func_ptr = LoadSymbol("cufftExecZ2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, idata, odata); -} - -cufftResult CUFFTAPI cufftSetStream(cufftHandle plan, cudaStream_t stream) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cudaStream_t); - static auto func_ptr = LoadSymbol("cufftSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, stream); -} - -cufftResult CUFFTAPI cufftSetCompatibilityMode(cufftHandle plan, - cufftCompatibility mode) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftCompatibility); - static auto func_ptr = LoadSymbol("cufftSetCompatibilityMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, mode); -} - -cufftResult CUFFTAPI cufftDestroy(cufftHandle plan) { - using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle); - static auto func_ptr = LoadSymbol("cufftDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -cufftResult CUFFTAPI cufftGetVersion(int *version) { - using FuncPtr = cufftResult(CUFFTAPI *)(int *); - static auto func_ptr = LoadSymbol("cufftGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cufftResult CUFFTAPI cufftGetProperty(libraryPropertyType type, int *value) { - using FuncPtr = cufftResult(CUFFTAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cufftGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cufft_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cufft_stub.cc index e7ea9d1eee78c8..8f5c1b0d687337 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cufft_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cufft_stub.cc @@ -34,23 +34,38 @@ void* GetDsoHandle() { #endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -cufftResult GetSymbolNotFoundError() { return CUFFT_INTERNAL_ERROR; } +const char* kSymbols[] = { +#include "tsl/cuda/cufft.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + } // namespace -#if CUFFT_VERSION < 10000 -#include "tsl/cuda/cufft_9_0.inc" -#else -// All CUDA-10+ implementations use the same API. -#include "tsl/cuda/cufft_10_0.inc" -#endif +extern "C" { + +static cufftResult GetSymbolNotFoundError() { return CUFFT_INTERNAL_ERROR; } + +extern void* _cufft_tramp_table[]; + +void _cufft_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cufft_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cupti.symbols new file mode 100644 index 00000000000000..0e95eaba0d5a67 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cupti.symbols @@ -0,0 +1,148 @@ +InitializeInjectionNvtx +InitializeInjectionNvtx2 +cuptiActivityConfigurePCSampling +cuptiActivityConfigureUnifiedMemoryCounter +cuptiActivityDisable +cuptiActivityDisableContext +cuptiActivityEnable +cuptiActivityEnableAndDump +cuptiActivityEnableBufferSummary +cuptiActivityEnableContext +cuptiActivityEnableLatencyTimestamps +cuptiActivityEnableLaunchAttributes +cuptiActivityEnableRawTimestamps +cuptiActivityFlush +cuptiActivityFlushAll +cuptiActivityFlushPeriod +cuptiActivityGetAttribute +cuptiActivityGetNextRecord +cuptiActivityGetNumDroppedRecords +cuptiActivityPopExternalCorrelationId +cuptiActivityPushExternalCorrelationId +cuptiActivityRegisterCallbacks +cuptiActivityRegisterTimestampCallback +cuptiActivitySetAttribute +cuptiComputeCapabilitySupported +cuptiDeviceEnumEventDomains +cuptiDeviceEnumMetrics +cuptiDeviceGetAttribute +cuptiDeviceGetChipName +cuptiDeviceGetEventDomainAttribute +cuptiDeviceGetNumEventDomains +cuptiDeviceGetNumMetrics +cuptiDeviceSupported +cuptiDeviceVirtualizationMode +cuptiDisableKernelReplayMode +cuptiDisableLibcuda +cuptiDisableNonOverlappingMode +cuptiEnableAllDomains +cuptiEnableCallback +cuptiEnableDomain +cuptiEnableKernelReplayMode +cuptiEnableNonOverlappingMode +cuptiEnumEventDomains +cuptiEnumMetrics +cuptiEventDomainEnumEvents +cuptiEventDomainGetAttribute +cuptiEventDomainGetNumEvents +cuptiEventGetAttribute +cuptiEventGetIdFromName +cuptiEventGroupAddEvent +cuptiEventGroupCreate +cuptiEventGroupDestroy +cuptiEventGroupDisable +cuptiEventGroupEnable +cuptiEventGroupGetAttribute +cuptiEventGroupReadAllEvents +cuptiEventGroupReadEvent +cuptiEventGroupRemoveAllEvents +cuptiEventGroupRemoveEvent +cuptiEventGroupResetAllEvents +cuptiEventGroupSetAttribute +cuptiEventGroupSetDisable +cuptiEventGroupSetEnable +cuptiEventGroupSetsCreate +cuptiEventGroupSetsDestroy +cuptiFinalize +cuptiGetAutoBoostState +cuptiGetCallbackName +cuptiGetCallbackState +cuptiGetContextId +cuptiGetCubinCrc +cuptiGetDeviceId +cuptiGetErrorMessage +cuptiGetGlobalCallbackState +cuptiGetGraphId +cuptiGetGraphNodeId +cuptiGetLastError +cuptiGetNumContexts +cuptiGetNumEventDomains +cuptiGetNumMetrics +cuptiGetRecommendedBufferSize +cuptiGetResultString +cuptiGetSassToSourceCorrelation +cuptiGetStreamId +cuptiGetStreamIdEx +cuptiGetThreadIdType +cuptiGetTimestamp +cuptiGetVersion +cuptiKernelReplaySubscribeUpdate +cuptiMetricCreateEventGroupSets +cuptiMetricEnumEvents +cuptiMetricEnumProperties +cuptiMetricGetAttribute +cuptiMetricGetIdFromName +cuptiMetricGetNumEvents +cuptiMetricGetNumProperties +cuptiMetricGetRequiredEventGroupSets +cuptiMetricGetValue +cuptiMetricGetValue2 +cuptiNvtxInitialize +cuptiNvtxInitialize2 +cuptiOpenACCInitialize +cuptiOpenMpInitialize +cuptiOpenMpInitialize_v2 +cuptiPCSamplingDisable +cuptiPCSamplingEnable +cuptiPCSamplingGetConfigurationAttribute +cuptiPCSamplingGetData +cuptiPCSamplingGetNumStallReasons +cuptiPCSamplingGetStallReasons +cuptiPCSamplingSetConfigurationAttribute +cuptiPCSamplingStart +cuptiPCSamplingStop +cuptiProfilerBeginPass +cuptiProfilerBeginSession +cuptiProfilerCounterDataImageCalculateScratchBufferSize +cuptiProfilerCounterDataImageCalculateSize +cuptiProfilerCounterDataImageInitialize +cuptiProfilerCounterDataImageInitializeScratchBuffer +cuptiProfilerDeInitialize +cuptiProfilerDeviceSupported +cuptiProfilerDisableProfiling +cuptiProfilerEnableProfiling +cuptiProfilerEndPass +cuptiProfilerEndSession +cuptiProfilerFlushCounterData +cuptiProfilerGetCounterAvailability +cuptiProfilerInitialize +cuptiProfilerPopRange +cuptiProfilerPushRange +cuptiProfilerSetConfig +cuptiProfilerUnsetConfig +cuptiRegisterComputeCrcCallback +cuptiSassMetricsDisable +cuptiSassMetricsEnable +cuptiSassMetricsFlushData +cuptiSassMetricsGetDataProperties +cuptiSassMetricsGetMetrics +cuptiSassMetricsGetNumOfMetrics +cuptiSassMetricsGetProperties +cuptiSassMetricsSetConfig +cuptiSassMetricsUnsetConfig +cuptiSetEventCollectionMode +cuptiSetThreadIdType +cuptiStateQuery +cuptiSubscribe +cuptiSupportedDomains +cuptiUnsubscribe diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_0.inc deleted file mode 100644 index dac0c7919342bd..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_0.inc +++ /dev/null @@ -1,763 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, - const char **str) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUptiResult, const char **); - static auto func_ptr = LoadSymbol("cuptiGetResultString"); - if (!func_ptr) { - if (str) { - *str = "CUPTI could not be loaded or symbol could not be found."; - } - return GetSymbolNotFoundError(); - } - return func_ptr(result, str); -} - -CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount, - CUpti_DomainTable *domainTable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_DomainTable *); - static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domainCount, domainTable); -} - -CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber, - CUpti_CallbackFunc callback, - void *userdata) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *, - CUpti_CallbackFunc, void *); - static auto func_ptr = LoadSymbol("cuptiSubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber, callback, userdata); -} - -CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(uint32_t *, CUpti_SubscriberHandle, - CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiEnableCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, - CUpti_CallbackDomain); - static auto func_ptr = LoadSymbol("cuptiEnableDomain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain); -} - -CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, - CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, - uint32_t cbid, const char **name) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_CallbackDomain, uint32_t, const char **); - static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domain, cbid, name); -} - -CUptiResult CUPTIAPI -cuptiSetEventCollectionMode(CUcontext context, CUpti_EventCollectionMode mode) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventCollectionMode); - static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, mode); -} - -CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, - CUpti_DeviceAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_DeviceAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, - uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, timestamp); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, - uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numDomains); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains( - CUdevice device, size_t *arraySizeBytes, CUpti_EventDomainID *domainArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute( - CUdevice device, CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, CUpti_EventDomainID, - CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = - LoadSymbol("cuptiDeviceGetEventDomainAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numDomains); -} - -CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetAttribute( - CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents( - CUpti_EventDomainID eventDomain, uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, numEvents); -} - -CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, - size_t *arraySizeBytes, - CUpti_EventID *eventArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, arraySizeBytes, eventArray); -} - -CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, - CUpti_EventAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventID, CUpti_EventAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, - const char *eventName, - CUpti_EventID *event) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventName, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, - CUpti_EventGroup *eventGroup, - uint32_t flags) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventGroup *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventGroup, flags); -} - -CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupGetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI -cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI -cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - CUpti_EventID event, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, - CUpti_EventID, size_t *, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, - eventValueBuffer); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents( - CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, - size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer, - size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray, - size_t *numEventIdsRead) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, - uint64_t *, size_t *, CUpti_EventID *, size_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, - eventValueBuffer, eventIdArraySizeBytes, eventIdArray, - numEventIdsRead); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsCreate( - CUcontext context, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_EventID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventIdArraySizeBytes, eventIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSets *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSets); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate( - CUpti_KernelReplayUpdateFunc updateFunc, void *customData) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_KernelReplayUpdateFunc, void *); - static auto func_ptr = - LoadSymbol("cuptiKernelReplaySubscribeUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(updateFunc, customData); -} - -CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numMetrics); -} - -CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, - uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numMetrics); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, - size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, - CUpti_MetricAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, CUpti_MetricAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, - const char *metricName, - CUpti_MetricID *metric) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metricName, metric); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, - uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numEvents); -} - -CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, - uint32_t *numProp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numProp); -} - -CUptiResult CUPTIAPI -cuptiMetricEnumProperties(CUpti_MetricID metric, size_t *propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, - CUpti_MetricPropertyID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, propIdArraySizeBytes, propIdArray); -} - -CUptiResult CUPTIAPI -cuptiMetricGetRequiredEventGroupSets(CUcontext context, CUpti_MetricID metric, - CUpti_EventGroupSets **eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_MetricID, - CUpti_EventGroupSets **); - static auto func_ptr = - LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metric, eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets( - CUcontext context, size_t metricIdArraySizeBytes, - CUpti_MetricID *metricIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_MetricID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metricIdArraySizeBytes, metricIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - uint64_t timeDuration, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_MetricID, size_t, - CUpti_EventID *, size_t, uint64_t *, - uint64_t, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, timeDuration, - metricValue); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue2( - CUpti_MetricID metric, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, size_t propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray, size_t propValueArraySizeBytes, - uint64_t *propValueArray, CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, size_t, - CUpti_MetricPropertyID *, size_t, uint64_t *, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, - propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, - propValueArray, metricValue); -} - -CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(timestamp); -} - -CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t *contextId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetContextId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, contextId); -} - -CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, - uint32_t *streamId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, - uint8_t perThreadStream, - uint32_t *streamId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint8_t, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, perThreadStream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, deviceId); -} - -CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, - uint32_t streamId, - size_t *dropped) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, size_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityGetNumDroppedRecords"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, dropped); -} - -CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t *buffer, - size_t validBufferSizeBytes, - CUpti_Activity **record) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t *, size_t, CUpti_Activity **); - static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(buffer, validBufferSizeBytes, record); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc funcBufferRequested, - CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_BuffersCallbackRequestFunc, - CUpti_BuffersCallbackCompleteFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcBufferRequested, funcBufferCompleted); -} - -CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, - uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlush"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, flag); -} - -CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flag); -} - -CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter( - CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_ActivityUnifiedMemoryCounterConfig *, uint32_t); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, count); -} - -CUptiResult CUPTIAPI -cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState *state) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityAutoBoostState *); - static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, state); -} - -CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling( - CUcontext ctx, CUpti_ActivityPCSamplingConfig *config) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityPCSamplingConfig *); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigurePCSampling"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, config); -} - -CUptiResult CUPTIAPI cuptiGetLastError(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType); - static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType *type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType *); - static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, - int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(int, int, int *); - static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, support); -} - -CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, int *); - static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, support); -} - -CUptiResult CUPTIAPI cuptiFinalize(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t id) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t); - static auto func_ptr = - LoadSymbol("cuptiActivityPushExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, id); -} - -CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t *lastId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityPopExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, lastId); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t); - static auto func_ptr = - LoadSymbol("cuptiActivityEnableLatencyTimestamps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_1.inc deleted file mode 100644 index dac0c7919342bd..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_1.inc +++ /dev/null @@ -1,763 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, - const char **str) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUptiResult, const char **); - static auto func_ptr = LoadSymbol("cuptiGetResultString"); - if (!func_ptr) { - if (str) { - *str = "CUPTI could not be loaded or symbol could not be found."; - } - return GetSymbolNotFoundError(); - } - return func_ptr(result, str); -} - -CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount, - CUpti_DomainTable *domainTable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_DomainTable *); - static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domainCount, domainTable); -} - -CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber, - CUpti_CallbackFunc callback, - void *userdata) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *, - CUpti_CallbackFunc, void *); - static auto func_ptr = LoadSymbol("cuptiSubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber, callback, userdata); -} - -CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(uint32_t *, CUpti_SubscriberHandle, - CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiEnableCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, - CUpti_CallbackDomain); - static auto func_ptr = LoadSymbol("cuptiEnableDomain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain); -} - -CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, - CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, - uint32_t cbid, const char **name) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_CallbackDomain, uint32_t, const char **); - static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domain, cbid, name); -} - -CUptiResult CUPTIAPI -cuptiSetEventCollectionMode(CUcontext context, CUpti_EventCollectionMode mode) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventCollectionMode); - static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, mode); -} - -CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, - CUpti_DeviceAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_DeviceAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, - uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, timestamp); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, - uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numDomains); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains( - CUdevice device, size_t *arraySizeBytes, CUpti_EventDomainID *domainArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute( - CUdevice device, CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, CUpti_EventDomainID, - CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = - LoadSymbol("cuptiDeviceGetEventDomainAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numDomains); -} - -CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetAttribute( - CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents( - CUpti_EventDomainID eventDomain, uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, numEvents); -} - -CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, - size_t *arraySizeBytes, - CUpti_EventID *eventArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, arraySizeBytes, eventArray); -} - -CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, - CUpti_EventAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventID, CUpti_EventAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, - const char *eventName, - CUpti_EventID *event) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventName, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, - CUpti_EventGroup *eventGroup, - uint32_t flags) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventGroup *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventGroup, flags); -} - -CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupGetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI -cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI -cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - CUpti_EventID event, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, - CUpti_EventID, size_t *, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, - eventValueBuffer); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents( - CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, - size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer, - size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray, - size_t *numEventIdsRead) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, - uint64_t *, size_t *, CUpti_EventID *, size_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, - eventValueBuffer, eventIdArraySizeBytes, eventIdArray, - numEventIdsRead); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsCreate( - CUcontext context, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_EventID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventIdArraySizeBytes, eventIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSets *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSets); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate( - CUpti_KernelReplayUpdateFunc updateFunc, void *customData) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_KernelReplayUpdateFunc, void *); - static auto func_ptr = - LoadSymbol("cuptiKernelReplaySubscribeUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(updateFunc, customData); -} - -CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numMetrics); -} - -CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, - uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numMetrics); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, - size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, - CUpti_MetricAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, CUpti_MetricAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, - const char *metricName, - CUpti_MetricID *metric) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metricName, metric); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, - uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numEvents); -} - -CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, - uint32_t *numProp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numProp); -} - -CUptiResult CUPTIAPI -cuptiMetricEnumProperties(CUpti_MetricID metric, size_t *propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, - CUpti_MetricPropertyID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, propIdArraySizeBytes, propIdArray); -} - -CUptiResult CUPTIAPI -cuptiMetricGetRequiredEventGroupSets(CUcontext context, CUpti_MetricID metric, - CUpti_EventGroupSets **eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_MetricID, - CUpti_EventGroupSets **); - static auto func_ptr = - LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metric, eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets( - CUcontext context, size_t metricIdArraySizeBytes, - CUpti_MetricID *metricIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_MetricID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metricIdArraySizeBytes, metricIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - uint64_t timeDuration, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_MetricID, size_t, - CUpti_EventID *, size_t, uint64_t *, - uint64_t, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, timeDuration, - metricValue); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue2( - CUpti_MetricID metric, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, size_t propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray, size_t propValueArraySizeBytes, - uint64_t *propValueArray, CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, size_t, - CUpti_MetricPropertyID *, size_t, uint64_t *, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, - propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, - propValueArray, metricValue); -} - -CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(timestamp); -} - -CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t *contextId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetContextId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, contextId); -} - -CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, - uint32_t *streamId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, - uint8_t perThreadStream, - uint32_t *streamId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint8_t, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, perThreadStream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, deviceId); -} - -CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, - uint32_t streamId, - size_t *dropped) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, size_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityGetNumDroppedRecords"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, dropped); -} - -CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t *buffer, - size_t validBufferSizeBytes, - CUpti_Activity **record) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t *, size_t, CUpti_Activity **); - static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(buffer, validBufferSizeBytes, record); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc funcBufferRequested, - CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_BuffersCallbackRequestFunc, - CUpti_BuffersCallbackCompleteFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcBufferRequested, funcBufferCompleted); -} - -CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, - uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlush"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, flag); -} - -CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flag); -} - -CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter( - CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_ActivityUnifiedMemoryCounterConfig *, uint32_t); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, count); -} - -CUptiResult CUPTIAPI -cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState *state) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityAutoBoostState *); - static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, state); -} - -CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling( - CUcontext ctx, CUpti_ActivityPCSamplingConfig *config) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityPCSamplingConfig *); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigurePCSampling"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, config); -} - -CUptiResult CUPTIAPI cuptiGetLastError(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType); - static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType *type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType *); - static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, - int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(int, int, int *); - static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, support); -} - -CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, int *); - static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, support); -} - -CUptiResult CUPTIAPI cuptiFinalize(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t id) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t); - static auto func_ptr = - LoadSymbol("cuptiActivityPushExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, id); -} - -CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t *lastId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityPopExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, lastId); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t); - static auto func_ptr = - LoadSymbol("cuptiActivityEnableLatencyTimestamps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_2.inc deleted file mode 100644 index dac0c7919342bd..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_10_2.inc +++ /dev/null @@ -1,763 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, - const char **str) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUptiResult, const char **); - static auto func_ptr = LoadSymbol("cuptiGetResultString"); - if (!func_ptr) { - if (str) { - *str = "CUPTI could not be loaded or symbol could not be found."; - } - return GetSymbolNotFoundError(); - } - return func_ptr(result, str); -} - -CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount, - CUpti_DomainTable *domainTable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_DomainTable *); - static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domainCount, domainTable); -} - -CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber, - CUpti_CallbackFunc callback, - void *userdata) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *, - CUpti_CallbackFunc, void *); - static auto func_ptr = LoadSymbol("cuptiSubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber, callback, userdata); -} - -CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(uint32_t *, CUpti_SubscriberHandle, - CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiEnableCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, - CUpti_CallbackDomain); - static auto func_ptr = LoadSymbol("cuptiEnableDomain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain); -} - -CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, - CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, - uint32_t cbid, const char **name) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_CallbackDomain, uint32_t, const char **); - static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domain, cbid, name); -} - -CUptiResult CUPTIAPI -cuptiSetEventCollectionMode(CUcontext context, CUpti_EventCollectionMode mode) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventCollectionMode); - static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, mode); -} - -CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, - CUpti_DeviceAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_DeviceAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, - uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, timestamp); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, - uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numDomains); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains( - CUdevice device, size_t *arraySizeBytes, CUpti_EventDomainID *domainArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute( - CUdevice device, CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, CUpti_EventDomainID, - CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = - LoadSymbol("cuptiDeviceGetEventDomainAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numDomains); -} - -CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetAttribute( - CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents( - CUpti_EventDomainID eventDomain, uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, numEvents); -} - -CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, - size_t *arraySizeBytes, - CUpti_EventID *eventArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, arraySizeBytes, eventArray); -} - -CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, - CUpti_EventAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventID, CUpti_EventAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, - const char *eventName, - CUpti_EventID *event) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventName, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, - CUpti_EventGroup *eventGroup, - uint32_t flags) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventGroup *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventGroup, flags); -} - -CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupGetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI -cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI -cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - CUpti_EventID event, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, - CUpti_EventID, size_t *, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, - eventValueBuffer); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents( - CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, - size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer, - size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray, - size_t *numEventIdsRead) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, - uint64_t *, size_t *, CUpti_EventID *, size_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, - eventValueBuffer, eventIdArraySizeBytes, eventIdArray, - numEventIdsRead); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsCreate( - CUcontext context, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_EventID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventIdArraySizeBytes, eventIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSets *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSets); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate( - CUpti_KernelReplayUpdateFunc updateFunc, void *customData) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_KernelReplayUpdateFunc, void *); - static auto func_ptr = - LoadSymbol("cuptiKernelReplaySubscribeUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(updateFunc, customData); -} - -CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numMetrics); -} - -CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, - uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numMetrics); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, - size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, - CUpti_MetricAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, CUpti_MetricAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, - const char *metricName, - CUpti_MetricID *metric) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metricName, metric); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, - uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numEvents); -} - -CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, - uint32_t *numProp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numProp); -} - -CUptiResult CUPTIAPI -cuptiMetricEnumProperties(CUpti_MetricID metric, size_t *propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, - CUpti_MetricPropertyID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, propIdArraySizeBytes, propIdArray); -} - -CUptiResult CUPTIAPI -cuptiMetricGetRequiredEventGroupSets(CUcontext context, CUpti_MetricID metric, - CUpti_EventGroupSets **eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_MetricID, - CUpti_EventGroupSets **); - static auto func_ptr = - LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metric, eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets( - CUcontext context, size_t metricIdArraySizeBytes, - CUpti_MetricID *metricIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_MetricID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metricIdArraySizeBytes, metricIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - uint64_t timeDuration, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_MetricID, size_t, - CUpti_EventID *, size_t, uint64_t *, - uint64_t, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, timeDuration, - metricValue); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue2( - CUpti_MetricID metric, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, size_t propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray, size_t propValueArraySizeBytes, - uint64_t *propValueArray, CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, size_t, - CUpti_MetricPropertyID *, size_t, uint64_t *, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, - propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, - propValueArray, metricValue); -} - -CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(timestamp); -} - -CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t *contextId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetContextId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, contextId); -} - -CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, - uint32_t *streamId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, - uint8_t perThreadStream, - uint32_t *streamId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint8_t, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, perThreadStream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, deviceId); -} - -CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, - uint32_t streamId, - size_t *dropped) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, size_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityGetNumDroppedRecords"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, dropped); -} - -CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t *buffer, - size_t validBufferSizeBytes, - CUpti_Activity **record) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t *, size_t, CUpti_Activity **); - static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(buffer, validBufferSizeBytes, record); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc funcBufferRequested, - CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_BuffersCallbackRequestFunc, - CUpti_BuffersCallbackCompleteFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcBufferRequested, funcBufferCompleted); -} - -CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, - uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlush"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, flag); -} - -CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flag); -} - -CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter( - CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_ActivityUnifiedMemoryCounterConfig *, uint32_t); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, count); -} - -CUptiResult CUPTIAPI -cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState *state) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityAutoBoostState *); - static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, state); -} - -CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling( - CUcontext ctx, CUpti_ActivityPCSamplingConfig *config) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityPCSamplingConfig *); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigurePCSampling"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, config); -} - -CUptiResult CUPTIAPI cuptiGetLastError(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType); - static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType *type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType *); - static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, - int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(int, int, int *); - static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, support); -} - -CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, int *); - static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, support); -} - -CUptiResult CUPTIAPI cuptiFinalize(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t id) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t); - static auto func_ptr = - LoadSymbol("cuptiActivityPushExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, id); -} - -CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t *lastId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityPopExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, lastId); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t); - static auto func_ptr = - LoadSymbol("cuptiActivityEnableLatencyTimestamps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_11_0.inc deleted file mode 100644 index dac0c7919342bd..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_11_0.inc +++ /dev/null @@ -1,763 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, - const char **str) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUptiResult, const char **); - static auto func_ptr = LoadSymbol("cuptiGetResultString"); - if (!func_ptr) { - if (str) { - *str = "CUPTI could not be loaded or symbol could not be found."; - } - return GetSymbolNotFoundError(); - } - return func_ptr(result, str); -} - -CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount, - CUpti_DomainTable *domainTable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_DomainTable *); - static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domainCount, domainTable); -} - -CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber, - CUpti_CallbackFunc callback, - void *userdata) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *, - CUpti_CallbackFunc, void *); - static auto func_ptr = LoadSymbol("cuptiSubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber, callback, userdata); -} - -CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(uint32_t *, CUpti_SubscriberHandle, - CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiEnableCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, - CUpti_CallbackDomain); - static auto func_ptr = LoadSymbol("cuptiEnableDomain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain); -} - -CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, - CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, - uint32_t cbid, const char **name) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_CallbackDomain, uint32_t, const char **); - static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domain, cbid, name); -} - -CUptiResult CUPTIAPI -cuptiSetEventCollectionMode(CUcontext context, CUpti_EventCollectionMode mode) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventCollectionMode); - static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, mode); -} - -CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, - CUpti_DeviceAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_DeviceAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, - uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, timestamp); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, - uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numDomains); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains( - CUdevice device, size_t *arraySizeBytes, CUpti_EventDomainID *domainArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute( - CUdevice device, CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, CUpti_EventDomainID, - CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = - LoadSymbol("cuptiDeviceGetEventDomainAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numDomains); -} - -CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetAttribute( - CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents( - CUpti_EventDomainID eventDomain, uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, numEvents); -} - -CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, - size_t *arraySizeBytes, - CUpti_EventID *eventArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, arraySizeBytes, eventArray); -} - -CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, - CUpti_EventAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventID, CUpti_EventAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, - const char *eventName, - CUpti_EventID *event) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventName, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, - CUpti_EventGroup *eventGroup, - uint32_t flags) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventGroup *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventGroup, flags); -} - -CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupGetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI -cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI -cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - CUpti_EventID event, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, - CUpti_EventID, size_t *, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, - eventValueBuffer); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents( - CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, - size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer, - size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray, - size_t *numEventIdsRead) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, - uint64_t *, size_t *, CUpti_EventID *, size_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, - eventValueBuffer, eventIdArraySizeBytes, eventIdArray, - numEventIdsRead); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsCreate( - CUcontext context, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_EventID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventIdArraySizeBytes, eventIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSets *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSets); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate( - CUpti_KernelReplayUpdateFunc updateFunc, void *customData) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_KernelReplayUpdateFunc, void *); - static auto func_ptr = - LoadSymbol("cuptiKernelReplaySubscribeUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(updateFunc, customData); -} - -CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numMetrics); -} - -CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, - uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numMetrics); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, - size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, - CUpti_MetricAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, CUpti_MetricAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, - const char *metricName, - CUpti_MetricID *metric) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metricName, metric); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, - uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numEvents); -} - -CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, - uint32_t *numProp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numProp); -} - -CUptiResult CUPTIAPI -cuptiMetricEnumProperties(CUpti_MetricID metric, size_t *propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, - CUpti_MetricPropertyID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, propIdArraySizeBytes, propIdArray); -} - -CUptiResult CUPTIAPI -cuptiMetricGetRequiredEventGroupSets(CUcontext context, CUpti_MetricID metric, - CUpti_EventGroupSets **eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_MetricID, - CUpti_EventGroupSets **); - static auto func_ptr = - LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metric, eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets( - CUcontext context, size_t metricIdArraySizeBytes, - CUpti_MetricID *metricIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_MetricID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metricIdArraySizeBytes, metricIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - uint64_t timeDuration, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_MetricID, size_t, - CUpti_EventID *, size_t, uint64_t *, - uint64_t, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, timeDuration, - metricValue); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue2( - CUpti_MetricID metric, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, size_t propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray, size_t propValueArraySizeBytes, - uint64_t *propValueArray, CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, size_t, - CUpti_MetricPropertyID *, size_t, uint64_t *, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, - propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, - propValueArray, metricValue); -} - -CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(timestamp); -} - -CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t *contextId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetContextId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, contextId); -} - -CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, - uint32_t *streamId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, - uint8_t perThreadStream, - uint32_t *streamId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint8_t, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, perThreadStream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, deviceId); -} - -CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, - uint32_t streamId, - size_t *dropped) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, size_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityGetNumDroppedRecords"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, dropped); -} - -CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t *buffer, - size_t validBufferSizeBytes, - CUpti_Activity **record) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t *, size_t, CUpti_Activity **); - static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(buffer, validBufferSizeBytes, record); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc funcBufferRequested, - CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_BuffersCallbackRequestFunc, - CUpti_BuffersCallbackCompleteFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcBufferRequested, funcBufferCompleted); -} - -CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, - uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlush"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, flag); -} - -CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flag); -} - -CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter( - CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_ActivityUnifiedMemoryCounterConfig *, uint32_t); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, count); -} - -CUptiResult CUPTIAPI -cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState *state) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityAutoBoostState *); - static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, state); -} - -CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling( - CUcontext ctx, CUpti_ActivityPCSamplingConfig *config) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityPCSamplingConfig *); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigurePCSampling"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, config); -} - -CUptiResult CUPTIAPI cuptiGetLastError(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType); - static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType *type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType *); - static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, - int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(int, int, int *); - static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, support); -} - -CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, int *); - static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, support); -} - -CUptiResult CUPTIAPI cuptiFinalize(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t id) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t); - static auto func_ptr = - LoadSymbol("cuptiActivityPushExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, id); -} - -CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t *lastId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityPopExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, lastId); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t); - static auto func_ptr = - LoadSymbol("cuptiActivityEnableLatencyTimestamps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_12_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_12_0.inc deleted file mode 100644 index 7396fdde55eb1c..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_12_0.inc +++ /dev/null @@ -1,744 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, const char **str) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUptiResult, const char **); - static auto func_ptr = LoadSymbol("cuptiGetResultString"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(result, str); -} - -CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount, - CUpti_DomainTable *domainTable) { - using FuncPtr = CUptiResult (CUPTIAPI *)(size_t *, CUpti_DomainTable *); - static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domainCount, domainTable); -} - -CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber, - CUpti_CallbackFunc callback, - void *userdata) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_SubscriberHandle *, CUpti_CallbackFunc, void *); - static auto func_ptr = LoadSymbol("cuptiSubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber, callback, userdata); -} - -CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t *, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiEnableCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain); - static auto func_ptr = LoadSymbol("cuptiEnableDomain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain); -} - -CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, - CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, - uint32_t cbid, - const char **name) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_CallbackDomain, uint32_t, const char **); - static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domain, cbid, name); -} - -CUptiResult CUPTIAPI cuptiSetEventCollectionMode(CUcontext context, - CUpti_EventCollectionMode mode) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_EventCollectionMode); - static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, mode); -} - -CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, - CUpti_DeviceAttribute attrib, - size_t *valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, CUpti_DeviceAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, - uint64_t *timestamp) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, timestamp); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, - uint32_t *numDomains) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numDomains); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains(CUdevice device, - size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute(CUdevice device, - CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, - size_t *valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetEventDomainAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numDomains); -} - -CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetAttribute(CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, - size_t *valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents(CUpti_EventDomainID eventDomain, - uint32_t *numEvents) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventDomainID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, numEvents); -} - -CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, - size_t *arraySizeBytes, - CUpti_EventID *eventArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventDomainID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, arraySizeBytes, eventArray); -} - -CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, - CUpti_EventAttribute attrib, - size_t *valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventID, CUpti_EventAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, - const char *eventName, - CUpti_EventID *event) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, const char *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventName, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, - CUpti_EventGroup *eventGroup, - uint32_t flags) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_EventGroup *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventGroup, flags); -} - -CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupGetAttribute(CUpti_EventGroup eventGroup, - CUpti_EventGroupAttribute attrib, - size_t *valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup, CUpti_EventGroupAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetAttribute(CUpti_EventGroup eventGroup, - CUpti_EventGroupAttribute attrib, - size_t valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - CUpti_EventID event, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, CUpti_EventID, size_t *, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, eventValueBuffer); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t *numEventIdsRead) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, uint64_t *, size_t *, CUpti_EventID *, size_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, eventValueBuffer, eventIdArraySizeBytes, eventIdArray, numEventIdsRead); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsCreate(CUcontext context, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, size_t, CUpti_EventID *, CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventIdArraySizeBytes, eventIdArray, eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroupSets *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate(CUpti_KernelReplayUpdateFunc updateFunc, void *customData) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_KernelReplayUpdateFunc, void *); - static auto func_ptr = LoadSymbol("cuptiKernelReplaySubscribeUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(updateFunc, customData); -} - -CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numMetrics); -} - -CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, - uint32_t *numMetrics) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numMetrics); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, - size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, - CUpti_MetricAttribute attrib, - size_t *valueSize, - void *value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_MetricID, CUpti_MetricAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, - const char *metricName, - CUpti_MetricID *metric) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, const char *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metricName, metric); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, - uint32_t *numEvents) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numEvents); -} - -CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, - uint32_t *numProp) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numProp); -} - -CUptiResult CUPTIAPI cuptiMetricEnumProperties(CUpti_MetricID metric, - size_t *propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_MetricPropertyID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, propIdArraySizeBytes, propIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetRequiredEventGroupSets(CUcontext context, - CUpti_MetricID metric, - CUpti_EventGroupSets **eventGroupSets) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_MetricID, CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metric, eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets(CUcontext context, - size_t metricIdArraySizeBytes, - CUpti_MetricID *metricIdArray, - CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, size_t, CUpti_MetricID *, CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metricIdArraySizeBytes, metricIdArray, eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, - CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - uint64_t timeDuration, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, uint64_t, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, eventValueArraySizeBytes, eventValueArray, timeDuration, metricValue); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue2(CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - size_t propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray, - size_t propValueArraySizeBytes, - uint64_t *propValueArray, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, size_t, CUpti_MetricPropertyID *, size_t, uint64_t *, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, eventValueArraySizeBytes, eventValueArray, propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, propValueArray, metricValue); -} - -CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t *timestamp) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(timestamp); -} - -CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t *contextId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetContextId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, contextId); -} - -CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *streamId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUstream, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, uint8_t perThreadStream, uint32_t *streamId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUstream, uint8_t, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, perThreadStream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, deviceId); -} - -CUptiResult CUPTIAPI cuptiGetGraphNodeId(CUgraphNode node, uint64_t *nodeId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUgraphNode, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetGraphNodeId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, nodeId); -} - -CUptiResult CUPTIAPI cuptiGetGraphId(CUgraph graph, uint32_t *pId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUgraph, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetGraphId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pId); -} - -CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableAndDump(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableAndDump"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, uint32_t streamId, - size_t *dropped) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, uint32_t, size_t *); - static auto func_ptr = LoadSymbol("cuptiActivityGetNumDroppedRecords"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, dropped); -} - -CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, - CUpti_Activity **record) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint8_t *, size_t, CUpti_Activity **); - static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(buffer, validBufferSizeBytes, record); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks(CUpti_BuffersCallbackRequestFunc funcBufferRequested, - CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_BuffersCallbackRequestFunc, CUpti_BuffersCallbackCompleteFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcBufferRequested, funcBufferCompleted); -} - -CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, uint32_t flag) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, uint32_t, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlush"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, flag); -} - -CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flag); -} - -CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void* value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void* value) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter(CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityUnifiedMemoryCounterConfig *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, count); -} - -CUptiResult CUPTIAPI cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState *state) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_ActivityAutoBoostState *); - static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, state); -} - -CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling(CUcontext ctx, CUpti_ActivityPCSamplingConfig *config) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUcontext, CUpti_ActivityPCSamplingConfig *); - static auto func_ptr = LoadSymbol("cuptiActivityConfigurePCSampling"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, config); -} - -CUptiResult CUPTIAPI cuptiGetLastError(void) { - using FuncPtr = CUptiResult (CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityThreadIdType); - static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType *type) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ActivityThreadIdType *); - static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, int *support) { - using FuncPtr = CUptiResult (CUPTIAPI *)(int, int, int *); - static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, support); -} - -CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int *support) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, int *); - static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, support); -} - -CUptiResult CUPTIAPI cuptiDeviceVirtualizationMode(CUdevice dev, CUpti_DeviceVirtualizationMode *mode) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUdevice, CUpti_DeviceVirtualizationMode *); - static auto func_ptr = LoadSymbol("cuptiDeviceVirtualizationMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, mode); -} - -CUptiResult CUPTIAPI cuptiFinalize(void) { - using FuncPtr = CUptiResult (CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId(CUpti_ExternalCorrelationKind kind, uint64_t id) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t); - static auto func_ptr = LoadSymbol("cuptiActivityPushExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, id); -} - -CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId(CUpti_ExternalCorrelationKind kind, uint64_t *lastId) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiActivityPopExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, lastId); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint8_t); - static auto func_ptr = LoadSymbol("cuptiActivityEnableLatencyTimestamps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -CUptiResult CUPTIAPI cuptiActivityFlushPeriod(uint32_t time) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushPeriod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(time); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLaunchAttributes(uint8_t enable) { - using FuncPtr = CUptiResult (CUPTIAPI *)(uint8_t); - static auto func_ptr = LoadSymbol("cuptiActivityEnableLaunchAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterTimestampCallback(CUpti_TimestampCallbackFunc funcTimestamp) { - using FuncPtr = CUptiResult (CUPTIAPI *)(CUpti_TimestampCallbackFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterTimestampCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcTimestamp); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_9_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_9_0.inc deleted file mode 100644 index dac0c7919342bd..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_9_0.inc +++ /dev/null @@ -1,763 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, - const char **str) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUptiResult, const char **); - static auto func_ptr = LoadSymbol("cuptiGetResultString"); - if (!func_ptr) { - if (str) { - *str = "CUPTI could not be loaded or symbol could not be found."; - } - return GetSymbolNotFoundError(); - } - return func_ptr(result, str); -} - -CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -CUptiResult CUPTIAPI cuptiSupportedDomains(size_t *domainCount, - CUpti_DomainTable *domainTable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_DomainTable *); - static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domainCount, domainTable); -} - -CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle *subscriber, - CUpti_CallbackFunc callback, - void *userdata) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *, - CUpti_CallbackFunc, void *); - static auto func_ptr = LoadSymbol("cuptiSubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber, callback, userdata); -} - -CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t *enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(uint32_t *, CUpti_SubscriberHandle, - CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); - static auto func_ptr = LoadSymbol("cuptiEnableCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain, cbid); -} - -CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle, - CUpti_CallbackDomain); - static auto func_ptr = LoadSymbol("cuptiEnableDomain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber, domain); -} - -CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, - CUpti_SubscriberHandle subscriber) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t, CUpti_SubscriberHandle); - static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable, subscriber); -} - -CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, - uint32_t cbid, const char **name) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_CallbackDomain, uint32_t, const char **); - static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(domain, cbid, name); -} - -CUptiResult CUPTIAPI -cuptiSetEventCollectionMode(CUcontext context, CUpti_EventCollectionMode mode) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventCollectionMode); - static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, mode); -} - -CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, - CUpti_DeviceAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_DeviceAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, - uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, timestamp); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, - uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numDomains); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains( - CUdevice device, size_t *arraySizeBytes, CUpti_EventDomainID *domainArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute( - CUdevice device, CUpti_EventDomainID eventDomain, - CUpti_EventDomainAttribute attrib, size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, CUpti_EventDomainID, - CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = - LoadSymbol("cuptiDeviceGetEventDomainAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numDomains); -} - -CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes, - CUpti_EventDomainID *domainArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_EventDomainID *); - static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, domainArray); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetAttribute( - CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents( - CUpti_EventDomainID eventDomain, uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, numEvents); -} - -CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, - size_t *arraySizeBytes, - CUpti_EventID *eventArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventDomainID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventDomain, arraySizeBytes, eventArray); -} - -CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, - CUpti_EventAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventID, CUpti_EventAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, - const char *eventName, - CUpti_EventID *event) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, eventName, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, - CUpti_EventGroup *eventGroup, - uint32_t flags) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_EventGroup *, uint32_t); - static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventGroup, flags); -} - -CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupGetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetAttribute( - CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, - size_t valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, - CUpti_EventID event) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_EventID); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, event); -} - -CUptiResult CUPTIAPI -cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI -cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroup); - static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, - CUpti_ReadEventFlags flags, - CUpti_EventID event, - size_t *eventValueBufferSizeBytes, - uint64_t *eventValueBuffer) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, - CUpti_EventID, size_t *, uint64_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, - eventValueBuffer); -} - -CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents( - CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, - size_t *eventValueBufferSizeBytes, uint64_t *eventValueBuffer, - size_t *eventIdArraySizeBytes, CUpti_EventID *eventIdArray, - size_t *numEventIdsRead) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, - uint64_t *, size_t *, CUpti_EventID *, size_t *); - static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, - eventValueBuffer, eventIdArraySizeBytes, eventIdArray, - numEventIdsRead); -} - -CUptiResult CUPTIAPI cuptiEventGroupSetsCreate( - CUcontext context, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_EventID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, eventIdArraySizeBytes, eventIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSets *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSets); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI -cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_EventGroupSet *); - static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(eventGroupSet); -} - -CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext); - static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context); -} - -CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate( - CUpti_KernelReplayUpdateFunc updateFunc, void *customData) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_KernelReplayUpdateFunc, void *); - static auto func_ptr = - LoadSymbol("cuptiKernelReplaySubscribeUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(updateFunc, customData); -} - -CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numMetrics); -} - -CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, - uint32_t *numMetrics) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, numMetrics); -} - -CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, - size_t *arraySizeBytes, - CUpti_MetricID *metricArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, size_t *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, arraySizeBytes, metricArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, - CUpti_MetricAttribute attrib, - size_t *valueSize, void *value) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, CUpti_MetricAttribute, - size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, attrib, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, - const char *metricName, - CUpti_MetricID *metric) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUdevice, const char *, CUpti_MetricID *); - static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metricName, metric); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, - uint32_t *numEvents) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numEvents); -} - -CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, - size_t *eventIdArraySizeBytes, - CUpti_EventID *eventIdArray) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, CUpti_EventID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); -} - -CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, - uint32_t *numProp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, numProp); -} - -CUptiResult CUPTIAPI -cuptiMetricEnumProperties(CUpti_MetricID metric, size_t *propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_MetricID, size_t *, - CUpti_MetricPropertyID *); - static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, propIdArraySizeBytes, propIdArray); -} - -CUptiResult CUPTIAPI -cuptiMetricGetRequiredEventGroupSets(CUcontext context, CUpti_MetricID metric, - CUpti_EventGroupSets **eventGroupSets) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_MetricID, - CUpti_EventGroupSets **); - static auto func_ptr = - LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metric, eventGroupSets); -} - -CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets( - CUcontext context, size_t metricIdArraySizeBytes, - CUpti_MetricID *metricIdArray, CUpti_EventGroupSets **eventGroupPasses) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, size_t, CUpti_MetricID *, - CUpti_EventGroupSets **); - static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, metricIdArraySizeBytes, metricIdArray, - eventGroupPasses); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, CUpti_MetricID metric, - size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, - size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, - uint64_t timeDuration, - CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, CUpti_MetricID, size_t, - CUpti_EventID *, size_t, uint64_t *, - uint64_t, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, timeDuration, - metricValue); -} - -CUptiResult CUPTIAPI cuptiMetricGetValue2( - CUpti_MetricID metric, size_t eventIdArraySizeBytes, - CUpti_EventID *eventIdArray, size_t eventValueArraySizeBytes, - uint64_t *eventValueArray, size_t propIdArraySizeBytes, - CUpti_MetricPropertyID *propIdArray, size_t propValueArraySizeBytes, - uint64_t *propValueArray, CUpti_MetricValue *metricValue) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_MetricID, size_t, CUpti_EventID *, size_t, uint64_t *, size_t, - CUpti_MetricPropertyID *, size_t, uint64_t *, CUpti_MetricValue *); - static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, - eventValueArraySizeBytes, eventValueArray, - propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, - propValueArray, metricValue); -} - -CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t *timestamp) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint64_t *); - static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(timestamp); -} - -CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t *contextId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetContextId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, contextId); -} - -CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, - uint32_t *streamId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, - uint8_t perThreadStream, - uint32_t *streamId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUstream, uint8_t, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, stream, perThreadStream, streamId); -} - -CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t *); - static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, deviceId); -} - -CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind); -} - -CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, - CUpti_ActivityKind kind) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityKind); - static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, kind); -} - -CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, - uint32_t streamId, - size_t *dropped) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, size_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityGetNumDroppedRecords"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, dropped); -} - -CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t *buffer, - size_t validBufferSizeBytes, - CUpti_Activity **record) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t *, size_t, CUpti_Activity **); - static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(buffer, validBufferSizeBytes, record); -} - -CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc funcBufferRequested, - CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_BuffersCallbackRequestFunc, - CUpti_BuffersCallbackCompleteFunc); - static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(funcBufferRequested, funcBufferCompleted); -} - -CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, - uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUcontext, uint32_t, uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlush"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, streamId, flag); -} - -CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint32_t); - static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flag); -} - -CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, - size_t *valueSize, void *value) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ActivityAttribute, size_t *, void *); - static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, valueSize, value); -} - -CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter( - CUpti_ActivityUnifiedMemoryCounterConfig *config, uint32_t count) { - using FuncPtr = CUptiResult(CUPTIAPI *)( - CUpti_ActivityUnifiedMemoryCounterConfig *, uint32_t); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, count); -} - -CUptiResult CUPTIAPI -cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState *state) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityAutoBoostState *); - static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(context, state); -} - -CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling( - CUcontext ctx, CUpti_ActivityPCSamplingConfig *config) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUcontext, CUpti_ActivityPCSamplingConfig *); - static auto func_ptr = - LoadSymbol("cuptiActivityConfigurePCSampling"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ctx, config); -} - -CUptiResult CUPTIAPI cuptiGetLastError(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType); - static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType *type) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUpti_ActivityThreadIdType *); - static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type); -} - -CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, - int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(int, int, int *); - static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(major, minor, support); -} - -CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int *support) { - using FuncPtr = CUptiResult(CUPTIAPI *)(CUdevice, int *); - static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dev, support); -} - -CUptiResult CUPTIAPI cuptiFinalize(void) { - using FuncPtr = CUptiResult(CUPTIAPI *)(); - static auto func_ptr = LoadSymbol("cuptiFinalize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t id) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t); - static auto func_ptr = - LoadSymbol("cuptiActivityPushExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, id); -} - -CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId( - CUpti_ExternalCorrelationKind kind, uint64_t *lastId) { - using FuncPtr = - CUptiResult(CUPTIAPI *)(CUpti_ExternalCorrelationKind, uint64_t *); - static auto func_ptr = - LoadSymbol("cuptiActivityPopExternalCorrelationId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(kind, lastId); -} - -CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { - using FuncPtr = CUptiResult(CUPTIAPI *)(uint8_t); - static auto func_ptr = - LoadSymbol("cuptiActivityEnableLatencyTimestamps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(enable); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cupti_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cupti_stub.cc index 2a6e094383bfab..9e632010d83a7a 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cupti_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cupti_stub.cc @@ -35,35 +35,38 @@ void* GetDsoHandle() { #endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -CUptiResult GetSymbolNotFoundError() { return CUPTI_ERROR_UNKNOWN; } +const char* kSymbols[] = { +#include "tsl/cuda/cupti.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + } // namespace -// For now we only need one stub implementation. We will need to generate -// a new file when CUPTI breaks backwards compatibility (has not been the case -// for quite a while) or if we want to use functionality introduced in a new -// version. -// -// Calling a function that is not yet available in the loaded CUPTI version will -// return CUPTI_ERROR_UNKNOWN. -#if CUDA_VERSION < 10010 -#include "tsl/cuda/cupti_10_0.inc" -#elif CUDA_VERSION < 10020 -#include "tsl/cuda/cupti_10_1.inc" -#elif CUDA_VERSION < 11000 -#include "tsl/cuda/cupti_10_2.inc" -#elif CUDA_VERSION < 12000 -#include "tsl/cuda/cupti_11_0.inc" -#else -#include "tsl/cuda/cupti_12_0.inc" -#endif +extern "C" { + +static CUptiResult GetSymbolNotFoundError() { return CUPTI_ERROR_UNKNOWN; } + +extern void* _cupti_tramp_table[]; + +void _cupti_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cupti_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/curand_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/curand_10_0.inc deleted file mode 100644 index 3c4c8fed67ac11..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/curand_10_0.inc +++ /dev/null @@ -1,268 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -curandStatus_t CURANDAPI curandCreateGenerator(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandCreateGeneratorHost(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGeneratorHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandDestroyGenerator(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandDestroyGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetVersion(int *version) { - using FuncPtr = curandStatus_t(CURANDAPI *)(int *); - static auto func_ptr = LoadSymbol("curandGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -curandStatus_t CURANDAPI curandGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = curandStatus_t(CURANDAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("curandGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -curandStatus_t CURANDAPI curandSetStream(curandGenerator_t generator, - cudaStream_t stream) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, cudaStream_t); - static auto func_ptr = LoadSymbol("curandSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, stream); -} - -curandStatus_t CURANDAPI curandSetPseudoRandomGeneratorSeed( - curandGenerator_t generator, unsigned long long seed) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = - LoadSymbol("curandSetPseudoRandomGeneratorSeed"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, seed); -} - -curandStatus_t CURANDAPI curandSetGeneratorOffset(curandGenerator_t generator, - unsigned long long offset) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = LoadSymbol("curandSetGeneratorOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, offset); -} - -curandStatus_t CURANDAPI curandSetGeneratorOrdering(curandGenerator_t generator, - curandOrdering_t order) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, curandOrdering_t); - static auto func_ptr = LoadSymbol("curandSetGeneratorOrdering"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, order); -} - -curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions( - curandGenerator_t generator, unsigned int num_dimensions) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int); - static auto func_ptr = - LoadSymbol("curandSetQuasiRandomGeneratorDimensions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, num_dimensions); -} - -curandStatus_t CURANDAPI curandGenerate(curandGenerator_t generator, - unsigned int *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t); - static auto func_ptr = LoadSymbol("curandGenerate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateLongLong(curandGenerator_t generator, - unsigned long long *outputPtr, - size_t num) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, - unsigned long long *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateLongLong"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniform(curandGenerator_t generator, - float *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniform"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniformDouble( - curandGenerator_t generator, double *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniformDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateNormalDouble(curandGenerator_t generator, - double *outputPtr, size_t n, - double mean, - double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateLogNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateLogNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI -curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr, - size_t n, double mean, double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateLogNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandCreatePoissonDistribution( - double lambda, curandDiscreteDistribution_t *discrete_distribution) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(double, curandDiscreteDistribution_t *); - static auto func_ptr = LoadSymbol("curandCreatePoissonDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lambda, discrete_distribution); -} - -curandStatus_t CURANDAPI -curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDiscreteDistribution_t); - static auto func_ptr = LoadSymbol("curandDestroyDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(discrete_distribution); -} - -curandStatus_t CURANDAPI curandGeneratePoisson(curandGenerator_t generator, - unsigned int *outputPtr, - size_t n, double lambda) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double); - static auto func_ptr = LoadSymbol("curandGeneratePoisson"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda); -} - -curandStatus_t CURANDAPI curandGeneratePoissonMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t n, - double lambda, curandMethod_t method) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGeneratePoissonMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda, method); -} - -curandStatus_t CURANDAPI curandGenerateBinomial(curandGenerator_t generator, - unsigned int *outputPtr, - size_t num, unsigned int n, - double p) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, unsigned int, double); - static auto func_ptr = LoadSymbol("curandGenerateBinomial"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p); -} - -curandStatus_t CURANDAPI curandGenerateBinomialMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t num, - unsigned int n, double p, curandMethod_t method) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t, - unsigned int, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGenerateBinomialMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p, method); -} - -curandStatus_t CURANDAPI curandGenerateSeeds(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandGenerateSeeds"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors32( - curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors32_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants32(unsigned int **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned int **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors64( - curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors64_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants64(unsigned long long **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned long long **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/curand_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/curand_10_1.inc deleted file mode 100644 index 3c4c8fed67ac11..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/curand_10_1.inc +++ /dev/null @@ -1,268 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -curandStatus_t CURANDAPI curandCreateGenerator(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandCreateGeneratorHost(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGeneratorHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandDestroyGenerator(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandDestroyGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetVersion(int *version) { - using FuncPtr = curandStatus_t(CURANDAPI *)(int *); - static auto func_ptr = LoadSymbol("curandGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -curandStatus_t CURANDAPI curandGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = curandStatus_t(CURANDAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("curandGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -curandStatus_t CURANDAPI curandSetStream(curandGenerator_t generator, - cudaStream_t stream) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, cudaStream_t); - static auto func_ptr = LoadSymbol("curandSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, stream); -} - -curandStatus_t CURANDAPI curandSetPseudoRandomGeneratorSeed( - curandGenerator_t generator, unsigned long long seed) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = - LoadSymbol("curandSetPseudoRandomGeneratorSeed"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, seed); -} - -curandStatus_t CURANDAPI curandSetGeneratorOffset(curandGenerator_t generator, - unsigned long long offset) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = LoadSymbol("curandSetGeneratorOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, offset); -} - -curandStatus_t CURANDAPI curandSetGeneratorOrdering(curandGenerator_t generator, - curandOrdering_t order) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, curandOrdering_t); - static auto func_ptr = LoadSymbol("curandSetGeneratorOrdering"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, order); -} - -curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions( - curandGenerator_t generator, unsigned int num_dimensions) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int); - static auto func_ptr = - LoadSymbol("curandSetQuasiRandomGeneratorDimensions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, num_dimensions); -} - -curandStatus_t CURANDAPI curandGenerate(curandGenerator_t generator, - unsigned int *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t); - static auto func_ptr = LoadSymbol("curandGenerate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateLongLong(curandGenerator_t generator, - unsigned long long *outputPtr, - size_t num) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, - unsigned long long *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateLongLong"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniform(curandGenerator_t generator, - float *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniform"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniformDouble( - curandGenerator_t generator, double *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniformDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateNormalDouble(curandGenerator_t generator, - double *outputPtr, size_t n, - double mean, - double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateLogNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateLogNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI -curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr, - size_t n, double mean, double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateLogNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandCreatePoissonDistribution( - double lambda, curandDiscreteDistribution_t *discrete_distribution) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(double, curandDiscreteDistribution_t *); - static auto func_ptr = LoadSymbol("curandCreatePoissonDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lambda, discrete_distribution); -} - -curandStatus_t CURANDAPI -curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDiscreteDistribution_t); - static auto func_ptr = LoadSymbol("curandDestroyDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(discrete_distribution); -} - -curandStatus_t CURANDAPI curandGeneratePoisson(curandGenerator_t generator, - unsigned int *outputPtr, - size_t n, double lambda) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double); - static auto func_ptr = LoadSymbol("curandGeneratePoisson"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda); -} - -curandStatus_t CURANDAPI curandGeneratePoissonMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t n, - double lambda, curandMethod_t method) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGeneratePoissonMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda, method); -} - -curandStatus_t CURANDAPI curandGenerateBinomial(curandGenerator_t generator, - unsigned int *outputPtr, - size_t num, unsigned int n, - double p) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, unsigned int, double); - static auto func_ptr = LoadSymbol("curandGenerateBinomial"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p); -} - -curandStatus_t CURANDAPI curandGenerateBinomialMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t num, - unsigned int n, double p, curandMethod_t method) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t, - unsigned int, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGenerateBinomialMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p, method); -} - -curandStatus_t CURANDAPI curandGenerateSeeds(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandGenerateSeeds"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors32( - curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors32_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants32(unsigned int **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned int **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors64( - curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors64_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants64(unsigned long long **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned long long **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/curand_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/curand_10_2.inc deleted file mode 100644 index 3c4c8fed67ac11..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/curand_10_2.inc +++ /dev/null @@ -1,268 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -curandStatus_t CURANDAPI curandCreateGenerator(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandCreateGeneratorHost(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGeneratorHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandDestroyGenerator(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandDestroyGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetVersion(int *version) { - using FuncPtr = curandStatus_t(CURANDAPI *)(int *); - static auto func_ptr = LoadSymbol("curandGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -curandStatus_t CURANDAPI curandGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = curandStatus_t(CURANDAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("curandGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -curandStatus_t CURANDAPI curandSetStream(curandGenerator_t generator, - cudaStream_t stream) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, cudaStream_t); - static auto func_ptr = LoadSymbol("curandSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, stream); -} - -curandStatus_t CURANDAPI curandSetPseudoRandomGeneratorSeed( - curandGenerator_t generator, unsigned long long seed) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = - LoadSymbol("curandSetPseudoRandomGeneratorSeed"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, seed); -} - -curandStatus_t CURANDAPI curandSetGeneratorOffset(curandGenerator_t generator, - unsigned long long offset) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = LoadSymbol("curandSetGeneratorOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, offset); -} - -curandStatus_t CURANDAPI curandSetGeneratorOrdering(curandGenerator_t generator, - curandOrdering_t order) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, curandOrdering_t); - static auto func_ptr = LoadSymbol("curandSetGeneratorOrdering"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, order); -} - -curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions( - curandGenerator_t generator, unsigned int num_dimensions) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int); - static auto func_ptr = - LoadSymbol("curandSetQuasiRandomGeneratorDimensions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, num_dimensions); -} - -curandStatus_t CURANDAPI curandGenerate(curandGenerator_t generator, - unsigned int *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t); - static auto func_ptr = LoadSymbol("curandGenerate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateLongLong(curandGenerator_t generator, - unsigned long long *outputPtr, - size_t num) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, - unsigned long long *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateLongLong"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniform(curandGenerator_t generator, - float *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniform"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniformDouble( - curandGenerator_t generator, double *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniformDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateNormalDouble(curandGenerator_t generator, - double *outputPtr, size_t n, - double mean, - double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateLogNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateLogNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI -curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr, - size_t n, double mean, double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateLogNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandCreatePoissonDistribution( - double lambda, curandDiscreteDistribution_t *discrete_distribution) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(double, curandDiscreteDistribution_t *); - static auto func_ptr = LoadSymbol("curandCreatePoissonDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lambda, discrete_distribution); -} - -curandStatus_t CURANDAPI -curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDiscreteDistribution_t); - static auto func_ptr = LoadSymbol("curandDestroyDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(discrete_distribution); -} - -curandStatus_t CURANDAPI curandGeneratePoisson(curandGenerator_t generator, - unsigned int *outputPtr, - size_t n, double lambda) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double); - static auto func_ptr = LoadSymbol("curandGeneratePoisson"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda); -} - -curandStatus_t CURANDAPI curandGeneratePoissonMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t n, - double lambda, curandMethod_t method) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGeneratePoissonMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda, method); -} - -curandStatus_t CURANDAPI curandGenerateBinomial(curandGenerator_t generator, - unsigned int *outputPtr, - size_t num, unsigned int n, - double p) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, unsigned int, double); - static auto func_ptr = LoadSymbol("curandGenerateBinomial"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p); -} - -curandStatus_t CURANDAPI curandGenerateBinomialMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t num, - unsigned int n, double p, curandMethod_t method) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t, - unsigned int, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGenerateBinomialMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p, method); -} - -curandStatus_t CURANDAPI curandGenerateSeeds(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandGenerateSeeds"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors32( - curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors32_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants32(unsigned int **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned int **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors64( - curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors64_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants64(unsigned long long **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned long long **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/curand_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/curand_11_0.inc deleted file mode 100644 index 3c4c8fed67ac11..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/curand_11_0.inc +++ /dev/null @@ -1,268 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -curandStatus_t CURANDAPI curandCreateGenerator(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandCreateGeneratorHost(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGeneratorHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandDestroyGenerator(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandDestroyGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetVersion(int *version) { - using FuncPtr = curandStatus_t(CURANDAPI *)(int *); - static auto func_ptr = LoadSymbol("curandGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -curandStatus_t CURANDAPI curandGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = curandStatus_t(CURANDAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("curandGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -curandStatus_t CURANDAPI curandSetStream(curandGenerator_t generator, - cudaStream_t stream) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, cudaStream_t); - static auto func_ptr = LoadSymbol("curandSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, stream); -} - -curandStatus_t CURANDAPI curandSetPseudoRandomGeneratorSeed( - curandGenerator_t generator, unsigned long long seed) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = - LoadSymbol("curandSetPseudoRandomGeneratorSeed"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, seed); -} - -curandStatus_t CURANDAPI curandSetGeneratorOffset(curandGenerator_t generator, - unsigned long long offset) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = LoadSymbol("curandSetGeneratorOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, offset); -} - -curandStatus_t CURANDAPI curandSetGeneratorOrdering(curandGenerator_t generator, - curandOrdering_t order) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, curandOrdering_t); - static auto func_ptr = LoadSymbol("curandSetGeneratorOrdering"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, order); -} - -curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions( - curandGenerator_t generator, unsigned int num_dimensions) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int); - static auto func_ptr = - LoadSymbol("curandSetQuasiRandomGeneratorDimensions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, num_dimensions); -} - -curandStatus_t CURANDAPI curandGenerate(curandGenerator_t generator, - unsigned int *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t); - static auto func_ptr = LoadSymbol("curandGenerate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateLongLong(curandGenerator_t generator, - unsigned long long *outputPtr, - size_t num) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, - unsigned long long *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateLongLong"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniform(curandGenerator_t generator, - float *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniform"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniformDouble( - curandGenerator_t generator, double *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniformDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateNormalDouble(curandGenerator_t generator, - double *outputPtr, size_t n, - double mean, - double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateLogNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateLogNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI -curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr, - size_t n, double mean, double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateLogNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandCreatePoissonDistribution( - double lambda, curandDiscreteDistribution_t *discrete_distribution) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(double, curandDiscreteDistribution_t *); - static auto func_ptr = LoadSymbol("curandCreatePoissonDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lambda, discrete_distribution); -} - -curandStatus_t CURANDAPI -curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDiscreteDistribution_t); - static auto func_ptr = LoadSymbol("curandDestroyDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(discrete_distribution); -} - -curandStatus_t CURANDAPI curandGeneratePoisson(curandGenerator_t generator, - unsigned int *outputPtr, - size_t n, double lambda) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double); - static auto func_ptr = LoadSymbol("curandGeneratePoisson"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda); -} - -curandStatus_t CURANDAPI curandGeneratePoissonMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t n, - double lambda, curandMethod_t method) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGeneratePoissonMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda, method); -} - -curandStatus_t CURANDAPI curandGenerateBinomial(curandGenerator_t generator, - unsigned int *outputPtr, - size_t num, unsigned int n, - double p) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, unsigned int, double); - static auto func_ptr = LoadSymbol("curandGenerateBinomial"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p); -} - -curandStatus_t CURANDAPI curandGenerateBinomialMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t num, - unsigned int n, double p, curandMethod_t method) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t, - unsigned int, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGenerateBinomialMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p, method); -} - -curandStatus_t CURANDAPI curandGenerateSeeds(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandGenerateSeeds"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors32( - curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors32_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants32(unsigned int **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned int **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors64( - curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors64_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants64(unsigned long long **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned long long **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/curand_9_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/curand_9_0.inc deleted file mode 100644 index 3c4c8fed67ac11..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/curand_9_0.inc +++ /dev/null @@ -1,268 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -curandStatus_t CURANDAPI curandCreateGenerator(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandCreateGeneratorHost(curandGenerator_t *generator, - curandRngType_t rng_type) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t *, curandRngType_t); - static auto func_ptr = LoadSymbol("curandCreateGeneratorHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, rng_type); -} - -curandStatus_t CURANDAPI curandDestroyGenerator(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandDestroyGenerator"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetVersion(int *version) { - using FuncPtr = curandStatus_t(CURANDAPI *)(int *); - static auto func_ptr = LoadSymbol("curandGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -curandStatus_t CURANDAPI curandGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = curandStatus_t(CURANDAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("curandGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -curandStatus_t CURANDAPI curandSetStream(curandGenerator_t generator, - cudaStream_t stream) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, cudaStream_t); - static auto func_ptr = LoadSymbol("curandSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, stream); -} - -curandStatus_t CURANDAPI curandSetPseudoRandomGeneratorSeed( - curandGenerator_t generator, unsigned long long seed) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = - LoadSymbol("curandSetPseudoRandomGeneratorSeed"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, seed); -} - -curandStatus_t CURANDAPI curandSetGeneratorOffset(curandGenerator_t generator, - unsigned long long offset) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned long long); - static auto func_ptr = LoadSymbol("curandSetGeneratorOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, offset); -} - -curandStatus_t CURANDAPI curandSetGeneratorOrdering(curandGenerator_t generator, - curandOrdering_t order) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, curandOrdering_t); - static auto func_ptr = LoadSymbol("curandSetGeneratorOrdering"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, order); -} - -curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions( - curandGenerator_t generator, unsigned int num_dimensions) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int); - static auto func_ptr = - LoadSymbol("curandSetQuasiRandomGeneratorDimensions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, num_dimensions); -} - -curandStatus_t CURANDAPI curandGenerate(curandGenerator_t generator, - unsigned int *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t); - static auto func_ptr = LoadSymbol("curandGenerate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateLongLong(curandGenerator_t generator, - unsigned long long *outputPtr, - size_t num) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, - unsigned long long *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateLongLong"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniform(curandGenerator_t generator, - float *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniform"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateUniformDouble( - curandGenerator_t generator, double *outputPtr, size_t num) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, size_t); - static auto func_ptr = LoadSymbol("curandGenerateUniformDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num); -} - -curandStatus_t CURANDAPI curandGenerateNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateNormalDouble(curandGenerator_t generator, - double *outputPtr, size_t n, - double mean, - double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandGenerateLogNormal(curandGenerator_t generator, - float *outputPtr, size_t n, - float mean, float stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, float *, - size_t, float, float); - static auto func_ptr = LoadSymbol("curandGenerateLogNormal"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI -curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr, - size_t n, double mean, double stddev) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, double *, - size_t, double, double); - static auto func_ptr = LoadSymbol("curandGenerateLogNormalDouble"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, mean, stddev); -} - -curandStatus_t CURANDAPI curandCreatePoissonDistribution( - double lambda, curandDiscreteDistribution_t *discrete_distribution) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(double, curandDiscreteDistribution_t *); - static auto func_ptr = LoadSymbol("curandCreatePoissonDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(lambda, discrete_distribution); -} - -curandStatus_t CURANDAPI -curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDiscreteDistribution_t); - static auto func_ptr = LoadSymbol("curandDestroyDistribution"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(discrete_distribution); -} - -curandStatus_t CURANDAPI curandGeneratePoisson(curandGenerator_t generator, - unsigned int *outputPtr, - size_t n, double lambda) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double); - static auto func_ptr = LoadSymbol("curandGeneratePoisson"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda); -} - -curandStatus_t CURANDAPI curandGeneratePoissonMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t n, - double lambda, curandMethod_t method) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGeneratePoissonMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, n, lambda, method); -} - -curandStatus_t CURANDAPI curandGenerateBinomial(curandGenerator_t generator, - unsigned int *outputPtr, - size_t num, unsigned int n, - double p) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, - size_t, unsigned int, double); - static auto func_ptr = LoadSymbol("curandGenerateBinomial"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p); -} - -curandStatus_t CURANDAPI curandGenerateBinomialMethod( - curandGenerator_t generator, unsigned int *outputPtr, size_t num, - unsigned int n, double p, curandMethod_t method) { - using FuncPtr = - curandStatus_t(CURANDAPI *)(curandGenerator_t, unsigned int *, size_t, - unsigned int, double, curandMethod_t); - static auto func_ptr = LoadSymbol("curandGenerateBinomialMethod"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator, outputPtr, num, n, p, method); -} - -curandStatus_t CURANDAPI curandGenerateSeeds(curandGenerator_t generator) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandGenerator_t); - static auto func_ptr = LoadSymbol("curandGenerateSeeds"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(generator); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors32( - curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors32_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants32(unsigned int **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned int **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants32"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -curandStatus_t CURANDAPI curandGetDirectionVectors64( - curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set) { - using FuncPtr = curandStatus_t(CURANDAPI *)(curandDirectionVectors64_t *[], - curandDirectionVectorSet_t); - static auto func_ptr = LoadSymbol("curandGetDirectionVectors64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(vectors, set); -} - -curandStatus_t CURANDAPI -curandGetScrambleConstants64(unsigned long long **constants) { - using FuncPtr = curandStatus_t(CURANDAPI *)(unsigned long long **); - static auto func_ptr = LoadSymbol("curandGetScrambleConstants64"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(constants); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusolver.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cusolver.symbols new file mode 100644 index 00000000000000..f10b69198d56bb --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cusolver.symbols @@ -0,0 +1,926 @@ +cublasCbatchAccGemm +cublasCbatchAccGemm_BufferSize +cublasCbatchGemm +cublasCbatchTrsm +cublasDbatchAccGemm +cublasDbatchAccGemm_BufferSize +cublasDbatchGemm +cublasDbatchTrsm +cublasSbatchAccGemm +cublasSbatchAccGemm_BufferSize +cublasSbatchGemm +cublasSbatchTrsm +cublasZbatchAccGemm +cublasZbatchAccGemm_BufferSize +cublasZbatchGemm +cublasZbatchTrsm +cusolverDnCCgels +cusolverDnCCgels_bufferSize +cusolverDnCCgesv +cusolverDnCCgesv_bufferSize +cusolverDnCEgels +cusolverDnCEgels_bufferSize +cusolverDnCEgesv +cusolverDnCEgesv_bufferSize +cusolverDnCKgels +cusolverDnCKgels_bufferSize +cusolverDnCKgesv +cusolverDnCKgesv_bufferSize +cusolverDnCYgels +cusolverDnCYgels_bufferSize +cusolverDnCYgesv +cusolverDnCYgesv_bufferSize +cusolverDnCaxpyHost +cusolverDnCeyeBatch +cusolverDnCgebrd +cusolverDnCgebrd_bufferSize +cusolverDnCgehd2 +cusolverDnCgehd2_bufferSize +cusolverDnCgehrd +cusolverDnCgehrd_bufferSize +cusolverDnCgelqfHost +cusolverDnCgemmHost +cusolverDnCgeqlfHost +cusolverDnCgeqrf +cusolverDnCgeqrfHost +cusolverDnCgeqrf_ApI +cusolverDnCgeqrf_ApI_bufferSize +cusolverDnCgeqrf_bufferSize +cusolverDnCgercond +cusolverDnCgercond_bufferSize +cusolverDnCgesvd +cusolverDnCgesvd_bufferSize +cusolverDnCgesvdaStridedBatched +cusolverDnCgesvdaStridedBatched_bufferSize +cusolverDnCgesvdj +cusolverDnCgesvdjBatched +cusolverDnCgesvdjBatched_bufferSize +cusolverDnCgesvdj_bufferSize +cusolverDnCgetrf +cusolverDnCgetrf_bufferSize +cusolverDnCgetrs +cusolverDnCheevd +cusolverDnCheevd_bufferSize +cusolverDnCheevdx +cusolverDnCheevdx_bufferSize +cusolverDnCheevj +cusolverDnCheevjBatched +cusolverDnCheevjBatched_bufferSize +cusolverDnCheevj_bufferSize +cusolverDnChegst +cusolverDnChegst2 +cusolverDnChegst2_bufferSize +cusolverDnChegst_bufferSize +cusolverDnChegvd +cusolverDnChegvd_bufferSize +cusolverDnChegvdx +cusolverDnChegvdx_bufferSize +cusolverDnChegvj +cusolverDnChegvj_bufferSize +cusolverDnChemv +cusolverDnChemv_bufferSize +cusolverDnChetrd +cusolverDnChetrd_bufferSize +cusolverDnClacpyHost +cusolverDnClahr2 +cusolverDnClange +cusolverDnClangeHost +cusolverDnClange_bufferSize +cusolverDnClanhe +cusolverDnClanheHost +cusolverDnClanhe_bufferSize +cusolverDnClantrHost +cusolverDnClarfb +cusolverDnClarfgHost +cusolverDnClarft +cusolverDnClarft_bufferSize +cusolverDnClarnvHost +cusolverDnClascl +cusolverDnClasetHost +cusolverDnClaswp +cusolverDnClauum +cusolverDnClauumHost +cusolverDnClauum_bufferSize +cusolverDnCnrm2Host +cusolverDnCpolar +cusolverDnCpolar_bufferSize +cusolverDnCpotrf +cusolverDnCpotrfBatched +cusolverDnCpotrfHost +cusolverDnCpotrf_bufferSize +cusolverDnCpotri +cusolverDnCpotriHost +cusolverDnCpotri_bufferSize +cusolverDnCpotrs +cusolverDnCpotrsBatched +cusolverDnCpotrsHost +cusolverDnCreate +cusolverDnCreateGesvdjInfo +cusolverDnCreateParams +cusolverDnCreateSyevjInfo +cusolverDnCsscalHost +cusolverDnCsytrf +cusolverDnCsytrf_bufferSize +cusolverDnCsytri +cusolverDnCsytriHost +cusolverDnCsytri_bufferSize +cusolverDnCsytrsHost +cusolverDnCtrtriHost +cusolverDnCungbr +cusolverDnCungbrHost +cusolverDnCungbr_bufferSize +cusolverDnCunghrHost +cusolverDnCunglqHost +cusolverDnCungqr +cusolverDnCungqr_ApI +cusolverDnCungqr_ApI_bufferSize +cusolverDnCungqr_bufferSize +cusolverDnCungtr +cusolverDnCungtrHost +cusolverDnCungtr_bufferSize +cusolverDnCunmqr +cusolverDnCunmqrHost +cusolverDnCunmqr_bufferSize +cusolverDnCunmtr +cusolverDnCunmtrHost +cusolverDnCunmtr_bufferSize +cusolverDnDBgels +cusolverDnDBgels_bufferSize +cusolverDnDBgesv +cusolverDnDBgesv_bufferSize +cusolverDnDDgels +cusolverDnDDgels_bufferSize +cusolverDnDDgesv +cusolverDnDDgesv_bufferSize +cusolverDnDHgels +cusolverDnDHgels_bufferSize +cusolverDnDHgesv +cusolverDnDHgesv_bufferSize +cusolverDnDSgels +cusolverDnDSgels_bufferSize +cusolverDnDSgesv +cusolverDnDSgesv_bufferSize +cusolverDnDXgels +cusolverDnDXgels_bufferSize +cusolverDnDXgesv +cusolverDnDXgesv_bufferSize +cusolverDnDaxpyHost +cusolverDnDestroy +cusolverDnDestroyGesvdjInfo +cusolverDnDestroyParams +cusolverDnDestroySyevjInfo +cusolverDnDeyeBatch +cusolverDnDgebrd +cusolverDnDgebrd_bufferSize +cusolverDnDgehd2 +cusolverDnDgehd2_bufferSize +cusolverDnDgehrd +cusolverDnDgehrd_bufferSize +cusolverDnDgelqfHost +cusolverDnDgemmHost +cusolverDnDgeqlfHost +cusolverDnDgeqrf +cusolverDnDgeqrfHost +cusolverDnDgeqrf_ApI +cusolverDnDgeqrf_ApI_bufferSize +cusolverDnDgeqrf_bufferSize +cusolverDnDgercond +cusolverDnDgercond_bufferSize +cusolverDnDgesvd +cusolverDnDgesvd_bufferSize +cusolverDnDgesvdaStridedBatched +cusolverDnDgesvdaStridedBatched_bufferSize +cusolverDnDgesvdj +cusolverDnDgesvdjBatched +cusolverDnDgesvdjBatched_bufferSize +cusolverDnDgesvdj_bufferSize +cusolverDnDgetrf +cusolverDnDgetrf_bufferSize +cusolverDnDgetrs +cusolverDnDlacpyHost +cusolverDnDlahr2 +cusolverDnDlamchHost +cusolverDnDlange +cusolverDnDlangeHost +cusolverDnDlange_bufferSize +cusolverDnDlansy +cusolverDnDlansyHost +cusolverDnDlansy_bufferSize +cusolverDnDlantrHost +cusolverDnDlarfb +cusolverDnDlarfgHost +cusolverDnDlarft +cusolverDnDlarft_bufferSize +cusolverDnDlarnvHost +cusolverDnDlascl +cusolverDnDlasetHost +cusolverDnDlaswp +cusolverDnDlauum +cusolverDnDlauumHost +cusolverDnDlauum_bufferSize +cusolverDnDnrm2Host +cusolverDnDorgbr +cusolverDnDorgbrHost +cusolverDnDorgbr_bufferSize +cusolverDnDorghrHost +cusolverDnDorglqHost +cusolverDnDorgqr +cusolverDnDorgqr_ApI +cusolverDnDorgqr_ApI_bufferSize +cusolverDnDorgqr_bufferSize +cusolverDnDorgtr +cusolverDnDorgtrHost +cusolverDnDorgtr_bufferSize +cusolverDnDormqr +cusolverDnDormqrHost +cusolverDnDormqr_bufferSize +cusolverDnDormtr +cusolverDnDormtrHost +cusolverDnDormtr_bufferSize +cusolverDnDpolar +cusolverDnDpolar_bufferSize +cusolverDnDpotrf +cusolverDnDpotrfBatched +cusolverDnDpotrfHost +cusolverDnDpotrf_bufferSize +cusolverDnDpotri +cusolverDnDpotriHost +cusolverDnDpotri_bufferSize +cusolverDnDpotrs +cusolverDnDpotrsBatched +cusolverDnDpotrsHost +cusolverDnDscalHost +cusolverDnDsteqrHost +cusolverDnDsterfHost +cusolverDnDsyevd +cusolverDnDsyevd_bufferSize +cusolverDnDsyevdx +cusolverDnDsyevdx_bufferSize +cusolverDnDsyevj +cusolverDnDsyevjBatched +cusolverDnDsyevjBatched_bufferSize +cusolverDnDsyevj_bufferSize +cusolverDnDsygst +cusolverDnDsygst2 +cusolverDnDsygst2_bufferSize +cusolverDnDsygst_bufferSize +cusolverDnDsygvd +cusolverDnDsygvd_bufferSize +cusolverDnDsygvdx +cusolverDnDsygvdx_bufferSize +cusolverDnDsygvj +cusolverDnDsygvj_bufferSize +cusolverDnDsymv +cusolverDnDsymv_bufferSize +cusolverDnDsytrd +cusolverDnDsytrd_bufferSize +cusolverDnDsytrf +cusolverDnDsytrf_bufferSize +cusolverDnDsytri +cusolverDnDsytriHost +cusolverDnDsytri_bufferSize +cusolverDnDsytrsHost +cusolverDnDtrtriHost +cusolverDnGeqrf +cusolverDnGeqrf_bufferSize +cusolverDnGesvd +cusolverDnGesvd_bufferSize +cusolverDnGetDeterministicMode +cusolverDnGetStream +cusolverDnGetrf +cusolverDnGetrf_bufferSize +cusolverDnGetrs +cusolverDnIRSInfosCreate +cusolverDnIRSInfosDestroy +cusolverDnIRSInfosGetMaxIters +cusolverDnIRSInfosGetNiters +cusolverDnIRSInfosGetOuterNiters +cusolverDnIRSInfosGetResidualHistory +cusolverDnIRSInfosRequestResidual +cusolverDnIRSParamsCreate +cusolverDnIRSParamsDestroy +cusolverDnIRSParamsDisableFallback +cusolverDnIRSParamsEnableFallback +cusolverDnIRSParamsGetMaxIters +cusolverDnIRSParamsSetMaxIters +cusolverDnIRSParamsSetMaxItersInner +cusolverDnIRSParamsSetRefinementSolver +cusolverDnIRSParamsSetSolverLowestPrecision +cusolverDnIRSParamsSetSolverMainPrecision +cusolverDnIRSParamsSetSolverPrecisions +cusolverDnIRSParamsSetTol +cusolverDnIRSParamsSetTolInner +cusolverDnIRSXgels +cusolverDnIRSXgels_bufferSize +cusolverDnIRSXgesv +cusolverDnIRSXgesv_bufferSize +cusolverDnPotrf +cusolverDnPotrf_bufferSize +cusolverDnPotrs +cusolverDnSBgels +cusolverDnSBgels_bufferSize +cusolverDnSBgesv +cusolverDnSBgesv_bufferSize +cusolverDnSHgels +cusolverDnSHgels_bufferSize +cusolverDnSHgesv +cusolverDnSHgesv_bufferSize +cusolverDnSSgels +cusolverDnSSgels_bufferSize +cusolverDnSSgesv +cusolverDnSSgesv_bufferSize +cusolverDnSXgels +cusolverDnSXgels_bufferSize +cusolverDnSXgesv +cusolverDnSXgesv_bufferSize +cusolverDnSaxpyHost +cusolverDnSetAdvOptions +cusolverDnSetDeterministicMode +cusolverDnSetStream +cusolverDnSeyeBatch +cusolverDnSgebrd +cusolverDnSgebrd_bufferSize +cusolverDnSgehd2 +cusolverDnSgehd2_bufferSize +cusolverDnSgehrd +cusolverDnSgehrd_bufferSize +cusolverDnSgelqfHost +cusolverDnSgemmHost +cusolverDnSgeqlfHost +cusolverDnSgeqrf +cusolverDnSgeqrfHost +cusolverDnSgeqrf_ApI +cusolverDnSgeqrf_ApI_bufferSize +cusolverDnSgeqrf_bufferSize +cusolverDnSgercond +cusolverDnSgercond_bufferSize +cusolverDnSgesvd +cusolverDnSgesvd_bufferSize +cusolverDnSgesvdaStridedBatched +cusolverDnSgesvdaStridedBatched_bufferSize +cusolverDnSgesvdj +cusolverDnSgesvdjBatched +cusolverDnSgesvdjBatched_bufferSize +cusolverDnSgesvdj_bufferSize +cusolverDnSgetrf +cusolverDnSgetrf_bufferSize +cusolverDnSgetrs +cusolverDnSlacpyHost +cusolverDnSlahr2 +cusolverDnSlamchHost +cusolverDnSlange +cusolverDnSlangeHost +cusolverDnSlange_bufferSize +cusolverDnSlansy +cusolverDnSlansyHost +cusolverDnSlansy_bufferSize +cusolverDnSlantrHost +cusolverDnSlarfb +cusolverDnSlarfgHost +cusolverDnSlarft +cusolverDnSlarft_bufferSize +cusolverDnSlarnvHost +cusolverDnSlascl +cusolverDnSlasetHost +cusolverDnSlaswp +cusolverDnSlauum +cusolverDnSlauumHost +cusolverDnSlauum_bufferSize +cusolverDnSnrm2Host +cusolverDnSorgbr +cusolverDnSorgbrHost +cusolverDnSorgbr_bufferSize +cusolverDnSorghrHost +cusolverDnSorglqHost +cusolverDnSorgqr +cusolverDnSorgqr_ApI +cusolverDnSorgqr_ApI_bufferSize +cusolverDnSorgqr_bufferSize +cusolverDnSorgtr +cusolverDnSorgtrHost +cusolverDnSorgtr_bufferSize +cusolverDnSormqr +cusolverDnSormqrHost +cusolverDnSormqr_bufferSize +cusolverDnSormtr +cusolverDnSormtrHost +cusolverDnSormtr_bufferSize +cusolverDnSpolar +cusolverDnSpolar_bufferSize +cusolverDnSpotrf +cusolverDnSpotrfBatched +cusolverDnSpotrfHost +cusolverDnSpotrf_bufferSize +cusolverDnSpotri +cusolverDnSpotriHost +cusolverDnSpotri_bufferSize +cusolverDnSpotrs +cusolverDnSpotrsBatched +cusolverDnSpotrsHost +cusolverDnSscalHost +cusolverDnSsteqrHost +cusolverDnSsterfHost +cusolverDnSsyevd +cusolverDnSsyevd_bufferSize +cusolverDnSsyevdx +cusolverDnSsyevdx_bufferSize +cusolverDnSsyevj +cusolverDnSsyevjBatched +cusolverDnSsyevjBatched_bufferSize +cusolverDnSsyevj_bufferSize +cusolverDnSsygst +cusolverDnSsygst2 +cusolverDnSsygst2_bufferSize +cusolverDnSsygst_bufferSize +cusolverDnSsygvd +cusolverDnSsygvd_bufferSize +cusolverDnSsygvdx +cusolverDnSsygvdx_bufferSize +cusolverDnSsygvj +cusolverDnSsygvj_bufferSize +cusolverDnSsymv +cusolverDnSsymv_bufferSize +cusolverDnSsytrd +cusolverDnSsytrd_bufferSize +cusolverDnSsytrf +cusolverDnSsytrf_bufferSize +cusolverDnSsytri +cusolverDnSsytriHost +cusolverDnSsytri_bufferSize +cusolverDnSsytrsHost +cusolverDnStrtriHost +cusolverDnSyevd +cusolverDnSyevd_bufferSize +cusolverDnSyevdx +cusolverDnSyevdx_bufferSize +cusolverDnXfillRandNormalOnDevice +cusolverDnXfillRandNormalOnHost +cusolverDnXgeqrf +cusolverDnXgeqrf_bufferSize +cusolverDnXgesvd +cusolverDnXgesvd_bufferSize +cusolverDnXgesvdjGetResidual +cusolverDnXgesvdjGetSweeps +cusolverDnXgesvdjSetMaxSweeps +cusolverDnXgesvdjSetSortEig +cusolverDnXgesvdjSetTolerance +cusolverDnXgesvdp +cusolverDnXgesvdp_bufferSize +cusolverDnXgesvdr +cusolverDnXgesvdr_bufferSize +cusolverDnXgetrf +cusolverDnXgetrf_bufferSize +cusolverDnXgetrs +cusolverDnXpotrf +cusolverDnXpotrf_bufferSize +cusolverDnXpotrs +cusolverDnXsyevd +cusolverDnXsyevd_bufferSize +cusolverDnXsyevdx +cusolverDnXsyevdx_bufferSize +cusolverDnXsyevjGetResidual +cusolverDnXsyevjGetSweeps +cusolverDnXsyevjSetMaxSweeps +cusolverDnXsyevjSetSortEig +cusolverDnXsyevjSetTolerance +cusolverDnXsytrs +cusolverDnXsytrs_bufferSize +cusolverDnXtrtri +cusolverDnXtrtri_bufferSize +cusolverDnZCgels +cusolverDnZCgels_bufferSize +cusolverDnZCgesv +cusolverDnZCgesv_bufferSize +cusolverDnZEgels +cusolverDnZEgels_bufferSize +cusolverDnZEgesv +cusolverDnZEgesv_bufferSize +cusolverDnZKgels +cusolverDnZKgels_bufferSize +cusolverDnZKgesv +cusolverDnZKgesv_bufferSize +cusolverDnZYgels +cusolverDnZYgels_bufferSize +cusolverDnZYgesv +cusolverDnZYgesv_bufferSize +cusolverDnZZgels +cusolverDnZZgels_bufferSize +cusolverDnZZgesv +cusolverDnZZgesv_bufferSize +cusolverDnZaxpyHost +cusolverDnZdscalHost +cusolverDnZeyeBatch +cusolverDnZgebrd +cusolverDnZgebrd_bufferSize +cusolverDnZgehd2 +cusolverDnZgehd2_bufferSize +cusolverDnZgehrd +cusolverDnZgehrd_bufferSize +cusolverDnZgelqfHost +cusolverDnZgemmHost +cusolverDnZgeqlfHost +cusolverDnZgeqrf +cusolverDnZgeqrfHost +cusolverDnZgeqrf_ApI +cusolverDnZgeqrf_ApI_bufferSize +cusolverDnZgeqrf_bufferSize +cusolverDnZgercond +cusolverDnZgercond_bufferSize +cusolverDnZgesvd +cusolverDnZgesvd_bufferSize +cusolverDnZgesvdaStridedBatched +cusolverDnZgesvdaStridedBatched_bufferSize +cusolverDnZgesvdj +cusolverDnZgesvdjBatched +cusolverDnZgesvdjBatched_bufferSize +cusolverDnZgesvdj_bufferSize +cusolverDnZgetrf +cusolverDnZgetrf_bufferSize +cusolverDnZgetrs +cusolverDnZheevd +cusolverDnZheevd_bufferSize +cusolverDnZheevdx +cusolverDnZheevdx_bufferSize +cusolverDnZheevj +cusolverDnZheevjBatched +cusolverDnZheevjBatched_bufferSize +cusolverDnZheevj_bufferSize +cusolverDnZhegst +cusolverDnZhegst2 +cusolverDnZhegst2_bufferSize +cusolverDnZhegst_bufferSize +cusolverDnZhegvd +cusolverDnZhegvd_bufferSize +cusolverDnZhegvdx +cusolverDnZhegvdx_bufferSize +cusolverDnZhegvj +cusolverDnZhegvj_bufferSize +cusolverDnZhemv +cusolverDnZhemv_bufferSize +cusolverDnZhetrd +cusolverDnZhetrd_bufferSize +cusolverDnZlacpyHost +cusolverDnZlahr2 +cusolverDnZlange +cusolverDnZlangeHost +cusolverDnZlange_bufferSize +cusolverDnZlanhe +cusolverDnZlanheHost +cusolverDnZlanhe_bufferSize +cusolverDnZlantrHost +cusolverDnZlarfb +cusolverDnZlarfgHost +cusolverDnZlarft +cusolverDnZlarft_bufferSize +cusolverDnZlarnvHost +cusolverDnZlascl +cusolverDnZlasetHost +cusolverDnZlaswp +cusolverDnZlauum +cusolverDnZlauumHost +cusolverDnZlauum_bufferSize +cusolverDnZnrm2Host +cusolverDnZpolar +cusolverDnZpolar_bufferSize +cusolverDnZpotrf +cusolverDnZpotrfBatched +cusolverDnZpotrfHost +cusolverDnZpotrf_bufferSize +cusolverDnZpotri +cusolverDnZpotriHost +cusolverDnZpotri_bufferSize +cusolverDnZpotrs +cusolverDnZpotrsBatched +cusolverDnZpotrsHost +cusolverDnZsytrf +cusolverDnZsytrf_bufferSize +cusolverDnZsytri +cusolverDnZsytriHost +cusolverDnZsytri_bufferSize +cusolverDnZsytrsHost +cusolverDnZtrtriHost +cusolverDnZungbr +cusolverDnZungbrHost +cusolverDnZungbr_bufferSize +cusolverDnZunghrHost +cusolverDnZunglqHost +cusolverDnZungqr +cusolverDnZungqr_ApI +cusolverDnZungqr_ApI_bufferSize +cusolverDnZungqr_bufferSize +cusolverDnZungtr +cusolverDnZungtrHost +cusolverDnZungtr_bufferSize +cusolverDnZunmqr +cusolverDnZunmqrHost +cusolverDnZunmqr_bufferSize +cusolverDnZunmtr +cusolverDnZunmtrHost +cusolverDnZunmtr_bufferSize +cusolverGetProperty +cusolverGetVersion +cusolverIidentityHost +cusolverRfAccessBundledFactorsDevice +cusolverRfAnalyze +cusolverRfBatchAnalyze +cusolverRfBatchRefactor +cusolverRfBatchResetValues +cusolverRfBatchSetupHost +cusolverRfBatchSolve +cusolverRfBatchZeroPivot +cusolverRfCreate +cusolverRfDestroy +cusolverRfExtractBundledFactorsHost +cusolverRfExtractSplitFactorsHost +cusolverRfGetAlgs +cusolverRfGetMatrixFormat +cusolverRfGetNumericBoostReport +cusolverRfGetNumericProperties +cusolverRfGetResetValuesFastMode +cusolverRfRefactor +cusolverRfResetValues +cusolverRfSetAlgs +cusolverRfSetMatrixFormat +cusolverRfSetNumericProperties +cusolverRfSetResetValuesFastMode +cusolverRfSetupDevice +cusolverRfSetupHost +cusolverRfSetupM +cusolverRfSolve +cusolverSpCcsrbtfHost +cusolverSpCcsrcholBufferInfo +cusolverSpCcsrcholBufferInfoHost +cusolverSpCcsrcholDiag +cusolverSpCcsrcholFactor +cusolverSpCcsrcholFactorHost +cusolverSpCcsrcholSolve +cusolverSpCcsrcholSolveHost +cusolverSpCcsrcholZeroPivot +cusolverSpCcsrcholZeroPivotHost +cusolverSpCcsreigsHost +cusolverSpCcsreigvsi +cusolverSpCcsreigvsiHost +cusolverSpCcsrlsqvqrHost +cusolverSpCcsrlsvchol +cusolverSpCcsrlsvcholHost +cusolverSpCcsrlsvluHost +cusolverSpCcsrlsvqr +cusolverSpCcsrlsvqrHost +cusolverSpCcsrluAnalysisHost +cusolverSpCcsrluBufferInfoHost +cusolverSpCcsrluExtractBTFHost +cusolverSpCcsrluExtractHost +cusolverSpCcsrluExtractMHost +cusolverSpCcsrluFactorHost +cusolverSpCcsrluSolveHost +cusolverSpCcsrluZeroPivotHost +cusolverSpCcsrlucondHost +cusolverSpCcsrqrBufferInfo +cusolverSpCcsrqrBufferInfoBatched +cusolverSpCcsrqrBufferInfoHost +cusolverSpCcsrqrFactor +cusolverSpCcsrqrFactorHost +cusolverSpCcsrqrSetup +cusolverSpCcsrqrSetupHost +cusolverSpCcsrqrSolve +cusolverSpCcsrqrSolveHost +cusolverSpCcsrqrZeroPivot +cusolverSpCcsrqrZeroPivotHost +cusolverSpCcsrqrrcond +cusolverSpCcsrqrsvBatched +cusolverSpCcsrsubm2denseHost +cusolverSpCcsrzfdHost +cusolverSpCreate +cusolverSpCreateCsrcholInfo +cusolverSpCreateCsrcholInfoHost +cusolverSpCreateCsrluInfoHost +cusolverSpCreateCsrqrInfo +cusolverSpCreateCsrqrInfoHost +cusolverSpCreateGluInfo +cusolverSpCreateSnluInfo +cusolverSpCsnluBuffersize +cusolverSpCsnluFactor +cusolverSpCsnluSolve +cusolverSpCsymgthr +cusolverSpCsymgthrHost +cusolverSpDcsrbtfHost +cusolverSpDcsrcholBufferInfo +cusolverSpDcsrcholBufferInfoHost +cusolverSpDcsrcholDiag +cusolverSpDcsrcholFactor +cusolverSpDcsrcholFactorHost +cusolverSpDcsrcholSolve +cusolverSpDcsrcholSolveHost +cusolverSpDcsrcholZeroPivot +cusolverSpDcsrcholZeroPivotHost +cusolverSpDcsreigsHost +cusolverSpDcsreigvsi +cusolverSpDcsreigvsiHost +cusolverSpDcsrlsqvqrHost +cusolverSpDcsrlsvchol +cusolverSpDcsrlsvcholHost +cusolverSpDcsrlsvluHost +cusolverSpDcsrlsvqr +cusolverSpDcsrlsvqrHost +cusolverSpDcsrluAnalysisHost +cusolverSpDcsrluBufferInfoHost +cusolverSpDcsrluExtractBTFHost +cusolverSpDcsrluExtractHost +cusolverSpDcsrluExtractMHost +cusolverSpDcsrluFactorHost +cusolverSpDcsrluSolveHost +cusolverSpDcsrluZeroPivotHost +cusolverSpDcsrlucondHost +cusolverSpDcsrqrBufferInfo +cusolverSpDcsrqrBufferInfoBatched +cusolverSpDcsrqrBufferInfoHost +cusolverSpDcsrqrFactor +cusolverSpDcsrqrFactorHost +cusolverSpDcsrqrSetup +cusolverSpDcsrqrSetupHost +cusolverSpDcsrqrSolve +cusolverSpDcsrqrSolveHost +cusolverSpDcsrqrZeroPivot +cusolverSpDcsrqrZeroPivotHost +cusolverSpDcsrqrrcond +cusolverSpDcsrqrsvBatched +cusolverSpDcsrsubm2denseHost +cusolverSpDcsrzfdHost +cusolverSpDestroy +cusolverSpDestroyCsrcholInfo +cusolverSpDestroyCsrcholInfoHost +cusolverSpDestroyCsrluInfoHost +cusolverSpDestroyCsrqrInfo +cusolverSpDestroyCsrqrInfoHost +cusolverSpDestroyGluInfo +cusolverSpDestroySnluInfo +cusolverSpDgluAnalysis +cusolverSpDgluBufferSize +cusolverSpDgluExtractMHost +cusolverSpDgluFactor +cusolverSpDgluNumericBoost +cusolverSpDgluReset +cusolverSpDgluSetup +cusolverSpDgluSetupByTile +cusolverSpDgluSolve +cusolverSpDnrminf +cusolverSpDsnluBuffersize +cusolverSpDsnluFactor +cusolverSpDsnluSolve +cusolverSpDsymgthr +cusolverSpDsymgthrHost +cusolverSpGetStream +cusolverSpLcsrqrBufferInfo +cusolverSpLcsrqrFactor +cusolverSpLcsrqrSetup +cusolverSpLcsrqrSolve +cusolverSpLcsrqrSolveRefine +cusolverSpLcsrqrZeroPivot +cusolverSpLcsrqrrcond +cusolverSpQcsrqrBufferInfo +cusolverSpQcsrqrFactor +cusolverSpQcsrqrSetup +cusolverSpQcsrqrSolve +cusolverSpQcsrqrSolveRefine +cusolverSpQcsrqrZeroPivot +cusolverSpQcsrqrrcond +cusolverSpScsrbtfHost +cusolverSpScsrcholBufferInfo +cusolverSpScsrcholBufferInfoHost +cusolverSpScsrcholDiag +cusolverSpScsrcholFactor +cusolverSpScsrcholFactorHost +cusolverSpScsrcholSolve +cusolverSpScsrcholSolveHost +cusolverSpScsrcholZeroPivot +cusolverSpScsrcholZeroPivotHost +cusolverSpScsreigsHost +cusolverSpScsreigvsi +cusolverSpScsreigvsiHost +cusolverSpScsrlsqvqrHost +cusolverSpScsrlsvchol +cusolverSpScsrlsvcholHost +cusolverSpScsrlsvluHost +cusolverSpScsrlsvqr +cusolverSpScsrlsvqrHost +cusolverSpScsrluAnalysisHost +cusolverSpScsrluBufferInfoHost +cusolverSpScsrluExtractBTFHost +cusolverSpScsrluExtractHost +cusolverSpScsrluExtractMHost +cusolverSpScsrluFactorHost +cusolverSpScsrluSolveHost +cusolverSpScsrluZeroPivotHost +cusolverSpScsrlucondHost +cusolverSpScsrqrBufferInfo +cusolverSpScsrqrBufferInfoBatched +cusolverSpScsrqrBufferInfoHost +cusolverSpScsrqrFactor +cusolverSpScsrqrFactorHost +cusolverSpScsrqrSetup +cusolverSpScsrqrSetupHost +cusolverSpScsrqrSolve +cusolverSpScsrqrSolveHost +cusolverSpScsrqrZeroPivot +cusolverSpScsrqrZeroPivotHost +cusolverSpScsrqrrcond +cusolverSpScsrqrsvBatched +cusolverSpScsrsubm2denseHost +cusolverSpScsrzfdHost +cusolverSpSetStream +cusolverSpSsnluBuffersize +cusolverSpSsnluFactor +cusolverSpSsnluSolve +cusolverSpSsymgthr +cusolverSpSsymgthrHost +cusolverSpXcsr2cscHost +cusolverSpXcsr2csc_bufferSizeHost +cusolverSpXcsrcholAnalysis +cusolverSpXcsrcholAnalysisHost +cusolverSpXcsrissymHost +cusolverSpXcsrluAnalysisHost +cusolverSpXcsrluConfigHost +cusolverSpXcsrluNnzHost +cusolverSpXcsrluNnzMHost +cusolverSpXcsrmetisndHost +cusolverSpXcsrnsepHost +cusolverSpXcsrpermHost +cusolverSpXcsrperm_bufferSizeHost +cusolverSpXcsrqrAnalysis +cusolverSpXcsrqrAnalysisBatched +cusolverSpXcsrqrAnalysisHost +cusolverSpXcsrqrConfigInfo +cusolverSpXcsrsubmHost +cusolverSpXcsrsubmNnzHost +cusolverSpXcsrsymamdHost +cusolverSpXcsrsymmdqHost +cusolverSpXcsrsympermHost +cusolverSpXcsrsymperm_bufferSizeHost +cusolverSpXcsrsymrcmHost +cusolverSpXgluZeroPivot +cusolverSpXsnluAnalysis +cusolverSpXsnluFlops +cusolverSpXsnluReport +cusolverSpXsnluResizeSupernodeBy +cusolverSpXsnluSetReorder +cusolverSpZcsrbtfHost +cusolverSpZcsrcholBufferInfo +cusolverSpZcsrcholBufferInfoHost +cusolverSpZcsrcholDiag +cusolverSpZcsrcholFactor +cusolverSpZcsrcholFactorHost +cusolverSpZcsrcholSolve +cusolverSpZcsrcholSolveHost +cusolverSpZcsrcholZeroPivot +cusolverSpZcsrcholZeroPivotHost +cusolverSpZcsreigsHost +cusolverSpZcsreigvsi +cusolverSpZcsreigvsiHost +cusolverSpZcsrlsqvqrHost +cusolverSpZcsrlsvchol +cusolverSpZcsrlsvcholHost +cusolverSpZcsrlsvluHost +cusolverSpZcsrlsvqr +cusolverSpZcsrlsvqrHost +cusolverSpZcsrluAnalysisHost +cusolverSpZcsrluBufferInfoHost +cusolverSpZcsrluExtractBTFHost +cusolverSpZcsrluExtractHost +cusolverSpZcsrluExtractMHost +cusolverSpZcsrluFactorHost +cusolverSpZcsrluSolveHost +cusolverSpZcsrluZeroPivotHost +cusolverSpZcsrlucondHost +cusolverSpZcsrqrBufferInfo +cusolverSpZcsrqrBufferInfoBatched +cusolverSpZcsrqrBufferInfoHost +cusolverSpZcsrqrFactor +cusolverSpZcsrqrFactorHost +cusolverSpZcsrqrSetup +cusolverSpZcsrqrSetupHost +cusolverSpZcsrqrSolve +cusolverSpZcsrqrSolveHost +cusolverSpZcsrqrZeroPivot +cusolverSpZcsrqrZeroPivotHost +cusolverSpZcsrqrrcond +cusolverSpZcsrqrsvBatched +cusolverSpZcsrsubm2denseHost +cusolverSpZcsrzfdHost +cusolverSpZsnluBuffersize +cusolverSpZsnluFactor +cusolverSpZsnluSolve +cusolverSpZsymgthr +cusolverSpZsymgthrHost +cusolverXcsrqr_dump +pegasusCcsr2dense +pegasusCcsrmvHost +pegasusDcsr2dense +pegasusDcsrmvHost +pegasusScsr2dense +pegasusScsrmvHost +pegasusXcoo2csr +pegasusXcooStableSortByRow +pegasusXcooStableSort_bufferSizeExt +pegasusXcsr2coo +pegasusZcsr2dense +pegasusZcsrmvHost diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_0.inc deleted file mode 100644 index d01117faeb2439..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_0.inc +++ /dev/null @@ -1,2283 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusolverStatus_t CUSOLVERAPI cusolverGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusolverGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreate(cusolverDnHandle_t *handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroy(cusolverDnHandle_t handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSetStream(cusolverDnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusolverDnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGetStream(cusolverDnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusolverDnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - float *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - double *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - float *B, int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, - int lda, cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, float *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, double *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, cuComplex *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrfBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - cuDoubleComplex *Aarray[], int lda, int *infoArray, int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - float *A[], int lda, float *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, float *[], int, float *[], - int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - double *A[], int lda, double *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, double *[], int, - double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuComplex *A[], int lda, cuComplex *B[], int ldb, - int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuComplex *[], int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuDoubleComplex *A[], int lda, cuDoubleComplex *B[], - int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuDoubleComplex *[], int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, cuComplex *, - int, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, - cuDoubleComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlaswp(cusolverDnHandle_t handle, int n, - float *A, int lda, int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, float *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlaswp(cusolverDnHandle_t handle, int n, - double *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, double *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClaswp(cusolverDnHandle_t handle, int n, - cuComplex *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuComplex *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnClaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlaswp(cusolverDnHandle_t handle, int n, - cuDoubleComplex *A, int lda, - int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuDoubleComplex *, int, int, - int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const float *A, int lda, - const int *devIpiv, float *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const double *A, - int lda, const int *devIpiv, - double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const cuComplex *A, - int lda, const int *devIpiv, - cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrs( - cusolverDnHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *devIpiv, cuDoubleComplex *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *TAU, float *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *TAU, double *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgeqrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *TAU, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgeqrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *TAU, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const float *A, int lda, - const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const double *A, int lda, - const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuComplex *A, int lda, - const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr(cusolverDnHandle_t handle, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, float *, int, const float *, float *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr(cusolverDnHandle_t handle, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr(cusolverDnHandle_t handle, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuComplex *, int, const cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr( - cusolverDnHandle_t handle, int m, int n, int k, cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, - const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, - const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, const double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, float *C, - int ldc, float *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, double *C, - int ldc, double *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - cuComplex *C, int ldc, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, - cuDoubleComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf_bufferSize( - cusolverDnHandle_t handle, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf_bufferSize( - cusolverDnHandle_t handle, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, int *ipiv, - float *work, int lwork, - int *info) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, int *ipiv, - double *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, int *ipiv, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - int *ipiv, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *D, float *E, float *TAUQ, - float *TAUP, float *Work, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *D, double *E, - double *TAUQ, double *TAUP, - double *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, double *, double *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - float *D, float *E, - cuComplex *TAUQ, cuComplex *TAUP, - cuComplex *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuComplex *, int, float *, float *, - cuComplex *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd( - cusolverDnHandle_t handle, int m, int n, cuDoubleComplex *A, int lda, - double *D, double *E, cuDoubleComplex *TAUQ, cuDoubleComplex *TAUP, - cuDoubleComplex *Work, int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, double *, double *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const float *A, int lda, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const double *A, int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuComplex *A, int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const cuComplex *, - int, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, float *, int, - const float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, double *, int, - const double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZungbr(cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, - int k, cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuDoubleComplex *, - int, const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *d, const float *e, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const float *, int, - const float *, const float *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *d, const double *e, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, const double *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const float *d, const float *e, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const float *, const float *, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const double *d, const double *e, - const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const double *, const double *, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *d, - float *e, float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - double *d, double *e, double *tau, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, double *, - double *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *d, - float *e, cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, float *, - float *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - double *, double *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const float *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const cuComplex *tau, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const float *A, int lda, - const float *tau, const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const double *A, int lda, - const double *tau, const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const double *, int, const double *, const double *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuComplex *A, int lda, - const cuComplex *tau, const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuComplex *, int, const cuComplex *, const cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, float *A, int lda, float *tau, - float *C, int ldc, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, float *, int, float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, double *A, int lda, double *tau, - double *C, int ldc, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, double *, int, double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCunmtr(cusolverDnHandle_t handle, cublasSideMode_t side, - cublasFillMode_t uplo, cublasOperation_t trans, int m, int n, - cuComplex *A, int lda, cuComplex *tau, cuComplex *C, int ldc, - cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuComplex *, int, cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuDoubleComplex *, int, cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, float *A, int lda, float *S, float *U, int ldu, float *VT, int ldvt, - float *work, int lwork, float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, float *, int, - float *, float *, int, float *, int, float *, int, float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, double *A, int lda, double *S, double *U, int ldu, double *VT, - int ldvt, double *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, double *, int, - double *, double *, int, double *, int, double *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuComplex *A, int lda, float *S, cuComplex *U, - int ldu, cuComplex *VT, int ldvt, cuComplex *work, int lwork, - float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuDoubleComplex *A, int lda, double *S, - cuDoubleComplex *U, int ldu, cuDoubleComplex *VT, int ldvt, - cuDoubleComplex *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZhegvd(cusolverDnHandle_t handle, cusolverEigType_t itype, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb, - double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateSyevjInfo(syevjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateSyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroySyevjInfo(syevjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroySyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetTolerance(syevjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetMaxSweeps(syevjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetSortEig(syevjInfo_t info, - int sort_eig) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_eig); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetResidual( - cusolverDnHandle_t handle, syevjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - syevjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetSweeps( - cusolverDnHandle_t handle, syevjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, syevjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, float *W, cuComplex *work, int lwork, - int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - float *A, int lda, float *W, - float *work, int lwork, int *info, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - double *A, int lda, double *W, - double *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, double *W, cuDoubleComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateGesvdjInfo(gesvdjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroyGesvdjInfo(gesvdjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroyGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetTolerance(gesvdjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetMaxSweeps(gesvdjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetSortEig(gesvdjInfo_t info, - int sort_svd) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_svd); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetResidual( - cusolverDnHandle_t handle, gesvdjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - gesvdjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetSweeps( - cusolverDnHandle_t handle, gesvdjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, gesvdjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuComplex *, int, - const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuDoubleComplex *, - int, const double *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, float *A, - int lda, float *S, float *U, int ldu, float *V, int ldv, float *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, float *, int, float *, - float *, int, float *, int, float *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, double *A, - int lda, double *S, double *U, int ldu, double *V, int ldv, double *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, double *, int, double *, - double *, int, double *, int, double *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, const double *, const cuDoubleComplex *, - int, const cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - float *A, int lda, float *S, float *U, int ldu, float *V, int ldv, - float *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, float *, int, - float *, float *, int, float *, int, float *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - double *A, int lda, double *S, double *U, int ldu, double *V, int ldv, - double *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, double *, int, - double *, double *, int, double *, int, double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_1.inc deleted file mode 100644 index d247958143a299..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_1.inc +++ /dev/null @@ -1,3139 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusolverStatus_t CUSOLVERAPI cusolverGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusolverGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cusolverStatus_t CUSOLVERAPI cusolverGetVersion(int *version) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(int *); - static auto func_ptr = LoadSymbol("cusolverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreate(cusolverDnHandle_t *handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroy(cusolverDnHandle_t handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSetStream(cusolverDnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusolverDnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGetStream(cusolverDnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusolverDnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - float *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - double *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - float *B, int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, - int lda, cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, float *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, double *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, cuComplex *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrfBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - cuDoubleComplex *Aarray[], int lda, int *infoArray, int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - float *A[], int lda, float *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, float *[], int, float *[], - int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - double *A[], int lda, double *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, double *[], int, - double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuComplex *A[], int lda, cuComplex *B[], int ldb, - int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuComplex *[], int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuDoubleComplex *A[], int lda, cuDoubleComplex *B[], - int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuDoubleComplex *[], int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnStrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnStrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnStrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnStrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDtrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, double *, - int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCtrtri( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuComplex *A, int lda, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, cuComplex *, - int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZtrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, - cuDoubleComplex *, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnClauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnClauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnClauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, cuComplex *, - int, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, - cuDoubleComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlaswp(cusolverDnHandle_t handle, int n, - float *A, int lda, int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, float *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlaswp(cusolverDnHandle_t handle, int n, - double *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, double *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClaswp(cusolverDnHandle_t handle, int n, - cuComplex *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuComplex *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnClaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlaswp(cusolverDnHandle_t handle, int n, - cuDoubleComplex *A, int lda, - int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuDoubleComplex *, int, int, - int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const float *A, int lda, - const int *devIpiv, float *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const double *A, - int lda, const int *devIpiv, - double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const cuComplex *A, - int lda, const int *devIpiv, - cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrs( - cusolverDnHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *devIpiv, cuDoubleComplex *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *TAU, float *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *TAU, double *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgeqrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *TAU, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgeqrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *TAU, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const float *A, int lda, - const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const double *A, int lda, - const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuComplex *A, int lda, - const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr(cusolverDnHandle_t handle, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, float *, int, const float *, float *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr(cusolverDnHandle_t handle, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr(cusolverDnHandle_t handle, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuComplex *, int, const cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr( - cusolverDnHandle_t handle, int m, int n, int k, cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, - const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, - const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, const double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, float *C, - int ldc, float *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, double *C, - int ldc, double *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - cuComplex *C, int ldc, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, - cuDoubleComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf_bufferSize( - cusolverDnHandle_t handle, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf_bufferSize( - cusolverDnHandle_t handle, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, int *ipiv, - float *work, int lwork, - int *info) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, int *ipiv, - double *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, int *ipiv, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - int *ipiv, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const float *A, int lda, const int *ipiv, float *B, int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const double *A, int lda, const int *ipiv, double *B, int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuComplex *A, int lda, const int *ipiv, cuComplex *B, int ldb, - int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *ipiv, cuDoubleComplex *B, - int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - const int *ipiv, float *B, - int ldb, float *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - const int *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, const int *ipiv, - double *B, int ldb, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - const int *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCsytrs(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, int lda, const int *ipiv, - cuComplex *B, int ldb, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *ipiv, cuDoubleComplex *B, - int ldb, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, float *A, int lda, - const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, const int *ipiv, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const int *ipiv, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const int *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const int *ipiv, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const int *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - const int *ipiv, cuComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, const int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytri( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, const int *ipiv, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *D, float *E, float *TAUQ, - float *TAUP, float *Work, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *D, double *E, - double *TAUQ, double *TAUP, - double *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, double *, double *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - float *D, float *E, - cuComplex *TAUQ, cuComplex *TAUP, - cuComplex *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuComplex *, int, float *, float *, - cuComplex *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd( - cusolverDnHandle_t handle, int m, int n, cuDoubleComplex *A, int lda, - double *D, double *E, cuDoubleComplex *TAUQ, cuDoubleComplex *TAUP, - cuDoubleComplex *Work, int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, double *, double *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const float *A, int lda, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const double *A, int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuComplex *A, int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const cuComplex *, - int, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, float *, int, - const float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, double *, int, - const double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZungbr(cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, - int k, cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuDoubleComplex *, - int, const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *d, const float *e, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const float *, int, - const float *, const float *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *d, const double *e, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, const double *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const float *d, const float *e, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const float *, const float *, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const double *d, const double *e, - const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const double *, const double *, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *d, - float *e, float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - double *d, double *e, double *tau, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, double *, - double *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *d, - float *e, cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, float *, - float *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - double *, double *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const float *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const cuComplex *tau, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const float *A, int lda, - const float *tau, const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const double *A, int lda, - const double *tau, const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const double *, int, const double *, const double *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuComplex *A, int lda, - const cuComplex *tau, const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuComplex *, int, const cuComplex *, const cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, float *A, int lda, float *tau, - float *C, int ldc, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, float *, int, float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, double *A, int lda, double *tau, - double *C, int ldc, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, double *, int, double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCunmtr(cusolverDnHandle_t handle, cublasSideMode_t side, - cublasFillMode_t uplo, cublasOperation_t trans, int m, int n, - cuComplex *A, int lda, cuComplex *tau, cuComplex *C, int ldc, - cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuComplex *, int, cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuDoubleComplex *, int, cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, float *A, int lda, float *S, float *U, int ldu, float *VT, int ldvt, - float *work, int lwork, float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, float *, int, - float *, float *, int, float *, int, float *, int, float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, double *A, int lda, double *S, double *U, int ldu, double *VT, - int ldvt, double *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, double *, int, - double *, double *, int, double *, int, double *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuComplex *A, int lda, float *S, cuComplex *U, - int ldu, cuComplex *VT, int ldvt, cuComplex *work, int lwork, - float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuDoubleComplex *A, int lda, double *S, - cuDoubleComplex *U, int ldu, cuDoubleComplex *VT, int ldvt, - cuDoubleComplex *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const float *A, int lda, float vl, float vu, - int il, int iu, int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const float *, int, float, float, int, int, int *, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const double *A, int lda, double vl, - double vu, int il, int iu, int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const double *, int, double, double, int, int, - int *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, float vl, - float vu, int il, int iu, int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const cuComplex *, int, float, float, int, int, - int *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, double vl, - double vu, int il, int iu, int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, double, double, int, - int, int *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, float *A, int lda, float vl, float vu, int il, - int iu, int *meig, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, float *, int, float, float, int, int, int *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, double *A, int lda, double vl, double vu, - int il, int iu, int *meig, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, double *, int, double, double, int, int, int *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCheevdx(cusolverDnHandle_t handle, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float vl, float vu, int il, int iu, - int *meig, float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, cuComplex *, int, float, float, int, int, int *, - float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, double vl, - double vu, int il, int iu, int *meig, double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, cuDoubleComplex *, int, double, double, int, int, - int *, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *B, int ldb, float vl, float vu, int il, int iu, - int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const float *, int, - const float *, int, float, float, int, int, int *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *B, int ldb, double vl, double vu, int il, int iu, - int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const double *, int, - const double *, int, double, double, int, int, int *, const double *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *B, int ldb, float vl, float vu, int il, int iu, - int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int, float, float, int, int, int *, const float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - double vl, double vu, int il, int iu, int *meig, const double *W, - int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, double, int, int, int *, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, float *A, int lda, - float *B, int ldb, float vl, float vu, int il, int iu, int *meig, float *W, - float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, float *, int, float *, int, - float, float, int, int, int *, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, double *A, int lda, - double *B, int ldb, double vl, double vu, int il, int iu, int *meig, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, double *, int, double *, int, - double, double, int, int, int *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, cuComplex *B, int ldb, float vl, float vu, int il, int iu, - int *meig, float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, float, float, int, int, int *, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *B, int ldb, double vl, double vu, int il, int iu, - int *meig, double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double, int, int, int *, double *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZhegvd(cusolverDnHandle_t handle, cusolverEigType_t itype, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb, - double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateSyevjInfo(syevjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateSyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroySyevjInfo(syevjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroySyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetTolerance(syevjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetMaxSweeps(syevjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetSortEig(syevjInfo_t info, - int sort_eig) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_eig); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetResidual( - cusolverDnHandle_t handle, syevjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - syevjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetSweeps( - cusolverDnHandle_t handle, syevjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, syevjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, float *W, cuComplex *work, int lwork, - int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - float *A, int lda, float *W, - float *work, int lwork, int *info, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - double *A, int lda, double *W, - double *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, double *W, cuDoubleComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateGesvdjInfo(gesvdjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroyGesvdjInfo(gesvdjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroyGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetTolerance(gesvdjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetMaxSweeps(gesvdjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetSortEig(gesvdjInfo_t info, - int sort_svd) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_svd); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetResidual( - cusolverDnHandle_t handle, gesvdjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - gesvdjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetSweeps( - cusolverDnHandle_t handle, gesvdjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, gesvdjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuComplex *, int, - const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuDoubleComplex *, - int, const double *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, float *A, - int lda, float *S, float *U, int ldu, float *V, int ldv, float *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, float *, int, float *, - float *, int, float *, int, float *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, double *A, - int lda, double *S, double *U, int ldu, double *V, int ldv, double *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, double *, int, double *, - double *, int, double *, int, double *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, const double *, const cuDoubleComplex *, - int, const cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - float *A, int lda, float *S, float *U, int ldu, float *V, int ldv, - float *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, float *, int, - float *, float *, int, float *, int, float *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - double *A, int lda, double *S, double *U, int ldu, double *V, int ldv, - double *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, double *, int, - double *, double *, int, double *, int, double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const float *d_A, int lda, long long int strideA, const float *d_S, - long long int strideS, const float *d_U, int ldu, long long int strideU, - const float *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - long long, const float *, long long, const float *, int, long long, - const float *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const double *d_A, int lda, long long int strideA, const double *d_S, - long long int strideS, const double *d_U, int ldu, long long int strideU, - const double *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - long long, const double *, long long, const double *, int, long long, - const double *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuComplex *d_A, int lda, long long int strideA, const float *d_S, - long long int strideS, const cuComplex *d_U, int ldu, long long int strideU, - const cuComplex *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, long long, const float *, long long, const cuComplex *, int, - long long, const cuComplex *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuDoubleComplex *d_A, int lda, long long int strideA, - const double *d_S, long long int strideS, const cuDoubleComplex *d_U, - int ldu, long long int strideU, const cuDoubleComplex *d_V, int ldv, - long long int strideV, int *lwork, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, long long, const double *, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, - long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const float *d_A, int lda, long long int strideA, float *d_S, - long long int strideS, float *d_U, int ldu, long long int strideU, - float *d_V, int ldv, long long int strideV, float *d_work, int lwork, - int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - long long, float *, long long, float *, int, long long, float *, int, - long long, float *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const double *d_A, int lda, long long int strideA, double *d_S, - long long int strideS, double *d_U, int ldu, long long int strideU, - double *d_V, int ldv, long long int strideV, double *d_work, int lwork, - int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - long long, double *, long long, double *, int, long long, double *, int, - long long, double *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuComplex *d_A, int lda, long long int strideA, float *d_S, - long long int strideS, cuComplex *d_U, int ldu, long long int strideU, - cuComplex *d_V, int ldv, long long int strideV, cuComplex *d_work, - int lwork, int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, long long, float *, long long, cuComplex *, int, long long, - cuComplex *, int, long long, cuComplex *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuDoubleComplex *d_A, int lda, long long int strideA, double *d_S, - long long int strideS, cuDoubleComplex *d_U, int ldu, long long int strideU, - cuDoubleComplex *d_V, int ldv, long long int strideV, - cuDoubleComplex *d_work, int lwork, int *d_info, double *h_R_nrmF, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, long long, double *, long long, - cuDoubleComplex *, int, long long, cuDoubleComplex *, int, long long, - cuDoubleComplex *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_2.inc deleted file mode 100644 index 5160c6337b2030..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_10_2.inc +++ /dev/null @@ -1,3667 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusolverStatus_t CUSOLVERAPI cusolverGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusolverGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cusolverStatus_t CUSOLVERAPI cusolverGetVersion(int *version) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(int *); - static auto func_ptr = LoadSymbol("cusolverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreate(cusolverDnHandle_t *handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroy(cusolverDnHandle_t handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSetStream(cusolverDnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusolverDnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGetStream(cusolverDnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusolverDnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsCreate(cusolverDnIRSParams_t *params_ptr) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params_ptr); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsDestroy(cusolverDnIRSParams_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetTol( - cusolverDnIRSParams_t params, cudaDataType data_type, double val) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cudaDataType, double); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsSetTol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, data_type, val); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetTolInner( - cusolverDnIRSParams_t params, cudaDataType data_type, double val) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cudaDataType, double); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsSetTolInner"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, data_type, val); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetSolverPrecisions( - cusolverDnIRSParams_t params, cudaDataType solver_main_precision, - cudaDataType solver_lowest_precision) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cudaDataType, cudaDataType); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetSolverPrecisions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, solver_main_precision, solver_lowest_precision); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetRefinementSolver( - cusolverDnIRSParams_t params, cusolverIRSRefinement_t refinement_solver) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverIRSRefinement_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetRefinementSolver"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, refinement_solver); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetMaxIters( - cusolverDnIRSParams_t params, cusolver_int_t maxiters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsSetMaxIters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, maxiters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetMaxItersInner( - cusolverDnIRSParams_t params, cusolver_int_t maxiters_inner) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetMaxItersInner"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, maxiters_inner); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsGetNiters( - cusolverDnIRSParams_t params, cusolver_int_t *niters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsGetNiters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, niters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsGetOuterNiters( - cusolverDnIRSParams_t params, cusolver_int_t *outer_niters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t *); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsGetOuterNiters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, outer_niters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsGetMaxIters( - cusolverDnIRSParams_t params, cusolver_int_t *maxiters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsGetMaxIters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, maxiters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetSolverMainPrecision( - cusolverDnIRSParams_t params, cudaDataType solver_main_precision) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cudaDataType); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetSolverMainPrecision"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, solver_main_precision); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetSolverLowestPrecision( - cusolverDnIRSParams_t params, cudaDataType solver_lowest_precision) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cudaDataType); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetSolverLowestPrecision"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, solver_lowest_precision); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosDestroy( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t infos) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverDnIRSInfos_t); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosCreate( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t *infos_ptr) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverDnIRSInfos_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos_ptr); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetNiters( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t infos, - cusolver_int_t *niters) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnIRSParams_t, cusolverDnIRSInfos_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosGetNiters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos, niters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetOuterNiters( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t infos, - cusolver_int_t *outer_niters) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnIRSParams_t, cusolverDnIRSInfos_t, cusolver_int_t *); - static auto func_ptr = - LoadSymbol("cusolverDnIRSInfosGetOuterNiters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos, outer_niters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetMaxIters( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t infos, - cusolver_int_t *maxiters) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnIRSParams_t, cusolverDnIRSInfos_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosGetMaxIters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos, maxiters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosRequestResidual( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t infos) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverDnIRSInfos_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSInfosRequestResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetResidualHistory( - cusolverDnIRSParams_t params, cusolverDnIRSInfos_t infos, - void **residual_history) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnIRSParams_t, cusolverDnIRSInfos_t, void **); - static auto func_ptr = - LoadSymbol("cusolverDnIRSInfosGetResidualHistory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, infos, residual_history); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZZgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZZgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZCgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZCgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZKgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZKgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCCgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCCgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCKgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCKgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDDgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDDgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDSgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDSgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDHgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDHgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSSgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSSgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSHgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSHgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZZgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZZgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZCgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZCgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZKgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZKgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCCgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCCgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCKgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCKgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDDgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDDgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDSgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDSgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDHgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDHgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSSgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSSgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSHgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSHgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSXgesv( - cusolverDnHandle_t handle, cusolverDnIRSParams_t gesv_irs_params, - cusolverDnIRSInfos_t gesv_irs_infos, cudaDataType inout_data_type, - cusolver_int_t n, cusolver_int_t nrhs, void *dA, cusolver_int_t ldda, - cusolver_int_t *dipiv, void *dB, cusolver_int_t lddb, void *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *niters, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnIRSParams_t, cusolverDnIRSInfos_t, - cudaDataType, cusolver_int_t, cusolver_int_t, void *, cusolver_int_t, - cusolver_int_t *, void *, cusolver_int_t, void *, cusolver_int_t, void *, - size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSXgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, gesv_irs_params, gesv_irs_infos, inout_data_type, n, - nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, niters, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSXgesv_bufferSize( - cusolverDnHandle_t handle, cusolverDnIRSParams_t params, cusolver_int_t n, - cusolver_int_t nrhs, size_t *lwork_bytes) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cusolverDnIRSParams_t, - cusolver_int_t, cusolver_int_t, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSXgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, n, nrhs, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - float *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - double *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - float *B, int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, - int lda, cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, float *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, double *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, cuComplex *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrfBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - cuDoubleComplex *Aarray[], int lda, int *infoArray, int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - float *A[], int lda, float *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, float *[], int, float *[], - int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - double *A[], int lda, double *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, double *[], int, - double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuComplex *A[], int lda, cuComplex *B[], int ldb, - int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuComplex *[], int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuDoubleComplex *A[], int lda, cuDoubleComplex *B[], - int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuDoubleComplex *[], int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnStrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnStrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnStrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnStrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDtrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, double *, - int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCtrtri( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuComplex *A, int lda, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, cuComplex *, - int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZtrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, - cuDoubleComplex *, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnClauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnClauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnClauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, cuComplex *, - int, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, - cuDoubleComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlaswp(cusolverDnHandle_t handle, int n, - float *A, int lda, int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, float *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlaswp(cusolverDnHandle_t handle, int n, - double *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, double *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClaswp(cusolverDnHandle_t handle, int n, - cuComplex *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuComplex *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnClaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlaswp(cusolverDnHandle_t handle, int n, - cuDoubleComplex *A, int lda, - int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuDoubleComplex *, int, int, - int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const float *A, int lda, - const int *devIpiv, float *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const double *A, - int lda, const int *devIpiv, - double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const cuComplex *A, - int lda, const int *devIpiv, - cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrs( - cusolverDnHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *devIpiv, cuDoubleComplex *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *TAU, float *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *TAU, double *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgeqrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *TAU, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgeqrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *TAU, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const float *A, int lda, - const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const double *A, int lda, - const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuComplex *A, int lda, - const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr(cusolverDnHandle_t handle, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, float *, int, const float *, float *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr(cusolverDnHandle_t handle, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr(cusolverDnHandle_t handle, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuComplex *, int, const cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr( - cusolverDnHandle_t handle, int m, int n, int k, cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, - const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, - const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, const double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, float *C, - int ldc, float *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, double *C, - int ldc, double *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - cuComplex *C, int ldc, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, - cuDoubleComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf_bufferSize( - cusolverDnHandle_t handle, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf_bufferSize( - cusolverDnHandle_t handle, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, int *ipiv, - float *work, int lwork, - int *info) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, int *ipiv, - double *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, int *ipiv, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - int *ipiv, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const float *A, int lda, const int *ipiv, float *B, int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const double *A, int lda, const int *ipiv, double *B, int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuComplex *A, int lda, const int *ipiv, cuComplex *B, int ldb, - int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *ipiv, cuDoubleComplex *B, - int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - const int *ipiv, float *B, - int ldb, float *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - const int *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, const int *ipiv, - double *B, int ldb, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - const int *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCsytrs(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, int lda, const int *ipiv, - cuComplex *B, int ldb, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *ipiv, cuDoubleComplex *B, - int ldb, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, float *A, int lda, - const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, const int *ipiv, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const int *ipiv, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const int *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const int *ipiv, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const int *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - const int *ipiv, cuComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, const int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytri( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, const int *ipiv, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *D, float *E, float *TAUQ, - float *TAUP, float *Work, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *D, double *E, - double *TAUQ, double *TAUP, - double *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, double *, double *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - float *D, float *E, - cuComplex *TAUQ, cuComplex *TAUP, - cuComplex *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuComplex *, int, float *, float *, - cuComplex *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd( - cusolverDnHandle_t handle, int m, int n, cuDoubleComplex *A, int lda, - double *D, double *E, cuDoubleComplex *TAUQ, cuDoubleComplex *TAUP, - cuDoubleComplex *Work, int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, double *, double *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const float *A, int lda, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const double *A, int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuComplex *A, int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const cuComplex *, - int, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, float *, int, - const float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, double *, int, - const double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZungbr(cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, - int k, cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuDoubleComplex *, - int, const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *d, const float *e, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const float *, int, - const float *, const float *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *d, const double *e, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, const double *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const float *d, const float *e, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const float *, const float *, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const double *d, const double *e, - const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const double *, const double *, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *d, - float *e, float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - double *d, double *e, double *tau, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, double *, - double *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *d, - float *e, cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, float *, - float *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - double *, double *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const float *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const cuComplex *tau, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const float *A, int lda, - const float *tau, const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const double *A, int lda, - const double *tau, const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const double *, int, const double *, const double *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuComplex *A, int lda, - const cuComplex *tau, const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuComplex *, int, const cuComplex *, const cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, float *A, int lda, float *tau, - float *C, int ldc, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, float *, int, float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, double *A, int lda, double *tau, - double *C, int ldc, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, double *, int, double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCunmtr(cusolverDnHandle_t handle, cublasSideMode_t side, - cublasFillMode_t uplo, cublasOperation_t trans, int m, int n, - cuComplex *A, int lda, cuComplex *tau, cuComplex *C, int ldc, - cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuComplex *, int, cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuDoubleComplex *, int, cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, float *A, int lda, float *S, float *U, int ldu, float *VT, int ldvt, - float *work, int lwork, float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, float *, int, - float *, float *, int, float *, int, float *, int, float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, double *A, int lda, double *S, double *U, int ldu, double *VT, - int ldvt, double *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, double *, int, - double *, double *, int, double *, int, double *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuComplex *A, int lda, float *S, cuComplex *U, - int ldu, cuComplex *VT, int ldvt, cuComplex *work, int lwork, - float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuDoubleComplex *A, int lda, double *S, - cuDoubleComplex *U, int ldu, cuDoubleComplex *VT, int ldvt, - cuDoubleComplex *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const float *A, int lda, float vl, float vu, - int il, int iu, int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const float *, int, float, float, int, int, int *, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const double *A, int lda, double vl, - double vu, int il, int iu, int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const double *, int, double, double, int, int, - int *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, float vl, - float vu, int il, int iu, int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const cuComplex *, int, float, float, int, int, - int *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, double vl, - double vu, int il, int iu, int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, double, double, int, - int, int *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, float *A, int lda, float vl, float vu, int il, - int iu, int *meig, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, float *, int, float, float, int, int, int *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, double *A, int lda, double vl, double vu, - int il, int iu, int *meig, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, double *, int, double, double, int, int, int *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCheevdx(cusolverDnHandle_t handle, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float vl, float vu, int il, int iu, - int *meig, float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, cuComplex *, int, float, float, int, int, int *, - float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, double vl, - double vu, int il, int iu, int *meig, double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, cuDoubleComplex *, int, double, double, int, int, - int *, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *B, int ldb, float vl, float vu, int il, int iu, - int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const float *, int, - const float *, int, float, float, int, int, int *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *B, int ldb, double vl, double vu, int il, int iu, - int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const double *, int, - const double *, int, double, double, int, int, int *, const double *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *B, int ldb, float vl, float vu, int il, int iu, - int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int, float, float, int, int, int *, const float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - double vl, double vu, int il, int iu, int *meig, const double *W, - int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, double, int, int, int *, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, float *A, int lda, - float *B, int ldb, float vl, float vu, int il, int iu, int *meig, float *W, - float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, float *, int, float *, int, - float, float, int, int, int *, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, double *A, int lda, - double *B, int ldb, double vl, double vu, int il, int iu, int *meig, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, double *, int, double *, int, - double, double, int, int, int *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, cuComplex *B, int ldb, float vl, float vu, int il, int iu, - int *meig, float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, float, float, int, int, int *, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *B, int ldb, double vl, double vu, int il, int iu, - int *meig, double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double, int, int, int *, double *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZhegvd(cusolverDnHandle_t handle, cusolverEigType_t itype, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb, - double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateSyevjInfo(syevjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateSyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroySyevjInfo(syevjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroySyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetTolerance(syevjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetMaxSweeps(syevjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetSortEig(syevjInfo_t info, - int sort_eig) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_eig); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetResidual( - cusolverDnHandle_t handle, syevjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - syevjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetSweeps( - cusolverDnHandle_t handle, syevjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, syevjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, float *W, cuComplex *work, int lwork, - int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - float *A, int lda, float *W, - float *work, int lwork, int *info, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - double *A, int lda, double *W, - double *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, double *W, cuDoubleComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateGesvdjInfo(gesvdjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroyGesvdjInfo(gesvdjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroyGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetTolerance(gesvdjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetMaxSweeps(gesvdjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetSortEig(gesvdjInfo_t info, - int sort_svd) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_svd); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetResidual( - cusolverDnHandle_t handle, gesvdjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - gesvdjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetSweeps( - cusolverDnHandle_t handle, gesvdjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, gesvdjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuComplex *, int, - const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuDoubleComplex *, - int, const double *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, float *A, - int lda, float *S, float *U, int ldu, float *V, int ldv, float *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, float *, int, float *, - float *, int, float *, int, float *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, double *A, - int lda, double *S, double *U, int ldu, double *V, int ldv, double *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, double *, int, double *, - double *, int, double *, int, double *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, const double *, const cuDoubleComplex *, - int, const cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - float *A, int lda, float *S, float *U, int ldu, float *V, int ldv, - float *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, float *, int, - float *, float *, int, float *, int, float *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - double *A, int lda, double *S, double *U, int ldu, double *V, int ldv, - double *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, double *, int, - double *, double *, int, double *, int, double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const float *d_A, int lda, long long int strideA, const float *d_S, - long long int strideS, const float *d_U, int ldu, long long int strideU, - const float *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - long long, const float *, long long, const float *, int, long long, - const float *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const double *d_A, int lda, long long int strideA, const double *d_S, - long long int strideS, const double *d_U, int ldu, long long int strideU, - const double *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - long long, const double *, long long, const double *, int, long long, - const double *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuComplex *d_A, int lda, long long int strideA, const float *d_S, - long long int strideS, const cuComplex *d_U, int ldu, long long int strideU, - const cuComplex *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, long long, const float *, long long, const cuComplex *, int, - long long, const cuComplex *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuDoubleComplex *d_A, int lda, long long int strideA, - const double *d_S, long long int strideS, const cuDoubleComplex *d_U, - int ldu, long long int strideU, const cuDoubleComplex *d_V, int ldv, - long long int strideV, int *lwork, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, long long, const double *, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, - long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const float *d_A, int lda, long long int strideA, float *d_S, - long long int strideS, float *d_U, int ldu, long long int strideU, - float *d_V, int ldv, long long int strideV, float *d_work, int lwork, - int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - long long, float *, long long, float *, int, long long, float *, int, - long long, float *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const double *d_A, int lda, long long int strideA, double *d_S, - long long int strideS, double *d_U, int ldu, long long int strideU, - double *d_V, int ldv, long long int strideV, double *d_work, int lwork, - int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - long long, double *, long long, double *, int, long long, double *, int, - long long, double *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuComplex *d_A, int lda, long long int strideA, float *d_S, - long long int strideS, cuComplex *d_U, int ldu, long long int strideU, - cuComplex *d_V, int ldv, long long int strideV, cuComplex *d_work, - int lwork, int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, long long, float *, long long, cuComplex *, int, long long, - cuComplex *, int, long long, cuComplex *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuDoubleComplex *d_A, int lda, long long int strideA, double *d_S, - long long int strideS, cuDoubleComplex *d_U, int ldu, long long int strideU, - cuDoubleComplex *d_V, int ldv, long long int strideV, - cuDoubleComplex *d_work, int lwork, int *d_info, double *h_R_nrmF, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, long long, double *, long long, - cuDoubleComplex *, int, long long, cuDoubleComplex *, int, long long, - cuDoubleComplex *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_11_0.inc deleted file mode 100644 index 89db3b965cfc1a..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_dense_11_0.inc +++ /dev/null @@ -1,5149 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusolverStatus_t CUSOLVERAPI cusolverGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusolverGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cusolverStatus_t CUSOLVERAPI cusolverGetVersion(int *version) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(int *); - static auto func_ptr = LoadSymbol("cusolverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(version); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreate(cusolverDnHandle_t *handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpCreate(cusolverSpHandle_t *handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverSpHandle_t *); - static auto func_ptr = LoadSymbol("cusolverSpCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroy(cusolverDnHandle_t handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpDestroy(cusolverSpHandle_t handle) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverSpHandle_t); - static auto func_ptr = LoadSymbol("cusolverSpDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSetStream(cusolverDnHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusolverDnSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpSetStream(cusolverSpHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverSpHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusolverSpSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGetStream(cusolverDnHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusolverDnGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsCreate(cusolverDnIRSParams_t *params_ptr) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params_ptr); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsDestroy(cusolverDnIRSParams_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetRefinementSolver( - cusolverDnIRSParams_t params, cusolverIRSRefinement_t refinement_solver) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverIRSRefinement_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetRefinementSolver"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, refinement_solver); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetSolverMainPrecision( - cusolverDnIRSParams_t params, cusolverPrecType_t solver_main_precision) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverPrecType_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetSolverMainPrecision"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, solver_main_precision); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetSolverLowestPrecision( - cusolverDnIRSParams_t params, cusolverPrecType_t solver_lowest_precision) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, - cusolverPrecType_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetSolverLowestPrecision"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, solver_lowest_precision); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetSolverPrecisions( - cusolverDnIRSParams_t params, cusolverPrecType_t solver_main_precision, - cusolverPrecType_t solver_lowest_precision) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnIRSParams_t, cusolverPrecType_t, cusolverPrecType_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetSolverPrecisions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, solver_main_precision, solver_lowest_precision); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsSetTol(cusolverDnIRSParams_t params, double val) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, double); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsSetTol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, val); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsSetTolInner(cusolverDnIRSParams_t params, double val) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, double); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsSetTolInner"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, val); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetMaxIters( - cusolverDnIRSParams_t params, cusolver_int_t maxiters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsSetMaxIters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, maxiters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsSetMaxItersInner( - cusolverDnIRSParams_t params, cusolver_int_t maxiters_inner) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsSetMaxItersInner"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, maxiters_inner); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSParamsGetMaxIters( - cusolverDnIRSParams_t params, cusolver_int_t *maxiters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSParamsGetMaxIters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, maxiters); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsEnableFallback(cusolverDnIRSParams_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsEnableFallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSParamsDisableFallback(cusolverDnIRSParams_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSParams_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSParamsDisableFallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSInfosDestroy(cusolverDnIRSInfos_t infos) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSInfosCreate(cusolverDnIRSInfos_t *infos_ptr) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos_ptr); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetNiters( - cusolverDnIRSInfos_t infos, cusolver_int_t *niters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosGetNiters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos, niters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetOuterNiters( - cusolverDnIRSInfos_t infos, cusolver_int_t *outer_niters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t, cusolver_int_t *); - static auto func_ptr = - LoadSymbol("cusolverDnIRSInfosGetOuterNiters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos, outer_niters); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnIRSInfosRequestResidual(cusolverDnIRSInfos_t infos) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t); - static auto func_ptr = - LoadSymbol("cusolverDnIRSInfosRequestResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetResidualHistory( - cusolverDnIRSInfos_t infos, void **residual_history) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t, void **); - static auto func_ptr = - LoadSymbol("cusolverDnIRSInfosGetResidualHistory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos, residual_history); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSInfosGetMaxIters( - cusolverDnIRSInfos_t infos, cusolver_int_t *maxiters) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnIRSInfos_t, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSInfosGetMaxIters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(infos, maxiters); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZZgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZZgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZCgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZCgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZKgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZKgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZEgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZEgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZYgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZYgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCCgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCCgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCEgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCEgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCKgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCKgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCYgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCYgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDDgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDDgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDSgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDSgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDHgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDHgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDBgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDBgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDXgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDXgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSSgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSSgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSHgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSHgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSBgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSBgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSXgesv( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSXgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZZgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZZgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZCgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZCgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZKgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZKgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZEgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZEgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZYgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuDoubleComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuDoubleComplex *, - cusolver_int_t, cusolver_int_t *, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZYgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCCgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCCgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCKgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCKgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCEgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCEgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCYgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - cuComplex *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cuComplex *, - cusolver_int_t, cusolver_int_t *, cuComplex *, cusolver_int_t, - cuComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCYgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDDgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDDgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDSgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDSgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDHgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDHgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDBgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDBgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDXgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, - double *dA, cusolver_int_t ldda, cusolver_int_t *dipiv, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, double *, - cusolver_int_t, cusolver_int_t *, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDXgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSSgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSSgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSHgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSHgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSBgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSBgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSXgesv_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t n, cusolver_int_t nrhs, float *dA, - cusolver_int_t ldda, cusolver_int_t *dipiv, float *dB, cusolver_int_t lddb, - float *dX, cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, float *, - cusolver_int_t, cusolver_int_t *, float *, cusolver_int_t, float *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSXgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, nrhs, dA, ldda, dipiv, dB, lddb, dX, lddx, - dWorkspace, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZZgels(cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZZgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZCgels(cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZCgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZKgels(cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZKgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZEgels(cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZEgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZYgels(cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnZYgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCCgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCCgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCKgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCKgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCEgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCEgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCYgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnCYgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDDgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDDgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDSgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDSgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDHgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDHgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDBgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDBgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDXgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnDXgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSSgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSSgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSHgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSHgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSBgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSBgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSXgels( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *iter, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t, cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnSXgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes, iter, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZZgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZZgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZCgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZCgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZKgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZKgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZEgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZEgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZYgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuDoubleComplex *dA, cusolver_int_t ldda, - cuDoubleComplex *dB, cusolver_int_t lddb, cuDoubleComplex *dX, - cusolver_int_t lddx, void *dWorkspace, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, cuDoubleComplex *, cusolver_int_t, - cuDoubleComplex *, cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnZYgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCCgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCCgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCKgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCKgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCEgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCEgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCYgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, cuComplex *dA, cusolver_int_t ldda, cuComplex *dB, - cusolver_int_t lddb, cuComplex *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - cuComplex *, cusolver_int_t, cuComplex *, cusolver_int_t, cuComplex *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnCYgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDDgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDDgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDSgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDSgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDHgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDHgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDBgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDBgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDXgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, double *dA, cusolver_int_t ldda, double *dB, - cusolver_int_t lddb, double *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - double *, cusolver_int_t, double *, cusolver_int_t, double *, - cusolver_int_t, void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnDXgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSSgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSSgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSHgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSHgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSBgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSBgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSXgels_bufferSize( - cusolverDnHandle_t handle, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, float *dA, cusolver_int_t ldda, float *dB, - cusolver_int_t lddb, float *dX, cusolver_int_t lddx, void *dWorkspace, - size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolver_int_t, cusolver_int_t, cusolver_int_t, - float *, cusolver_int_t, float *, cusolver_int_t, float *, cusolver_int_t, - void *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSXgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dWorkspace, - lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSXgesv( - cusolverDnHandle_t handle, cusolverDnIRSParams_t gesv_irs_params, - cusolverDnIRSInfos_t gesv_irs_infos, cusolver_int_t n, cusolver_int_t nrhs, - void *dA, cusolver_int_t ldda, void *dB, cusolver_int_t lddb, void *dX, - cusolver_int_t lddx, void *dWorkspace, size_t lwork_bytes, - cusolver_int_t *niters, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnIRSParams_t, cusolverDnIRSInfos_t, - cusolver_int_t, cusolver_int_t, void *, cusolver_int_t, void *, - cusolver_int_t, void *, cusolver_int_t, void *, size_t, cusolver_int_t *, - cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSXgesv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, gesv_irs_params, gesv_irs_infos, n, nrhs, dA, ldda, - dB, lddb, dX, lddx, dWorkspace, lwork_bytes, niters, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSXgesv_bufferSize( - cusolverDnHandle_t handle, cusolverDnIRSParams_t params, cusolver_int_t n, - cusolver_int_t nrhs, size_t *lwork_bytes) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cusolverDnIRSParams_t, - cusolver_int_t, cusolver_int_t, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSXgesv_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, n, nrhs, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSXgels( - cusolverDnHandle_t handle, cusolverDnIRSParams_t gels_irs_params, - cusolverDnIRSInfos_t gels_irs_infos, cusolver_int_t m, cusolver_int_t n, - cusolver_int_t nrhs, void *dA, cusolver_int_t ldda, void *dB, - cusolver_int_t lddb, void *dX, cusolver_int_t lddx, void *dWorkspace, - size_t lwork_bytes, cusolver_int_t *niters, cusolver_int_t *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnIRSParams_t, cusolverDnIRSInfos_t, - cusolver_int_t, cusolver_int_t, cusolver_int_t, void *, cusolver_int_t, - void *, cusolver_int_t, void *, cusolver_int_t, void *, size_t, - cusolver_int_t *, cusolver_int_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSXgels"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, gels_irs_params, gels_irs_infos, m, n, nrhs, dA, ldda, - dB, lddb, dX, lddx, dWorkspace, lwork_bytes, niters, d_info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnIRSXgels_bufferSize( - cusolverDnHandle_t handle, cusolverDnIRSParams_t params, cusolver_int_t m, - cusolver_int_t n, cusolver_int_t nrhs, size_t *lwork_bytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnIRSParams_t, cusolver_int_t, cusolver_int_t, - cusolver_int_t, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnIRSXgels_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, nrhs, lwork_bytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrf_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - float *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - double *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - float *B, int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, - int lda, cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, float *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, double *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotrfBatched(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - int n, cuComplex *Aarray[], - int lda, int *infoArray, - int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotrfBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - cuDoubleComplex *Aarray[], int lda, int *infoArray, int batchSize) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrfBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, Aarray, lda, infoArray, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - float *A[], int lda, float *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, float *[], int, float *[], - int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotrsBatched( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - double *A[], int lda, double *B[], int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, double *[], int, - double *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuComplex *A[], int lda, cuComplex *B[], int ldb, - int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuComplex *[], int, - cuComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnCpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotrsBatched(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, /* only support rhs = 1*/ - cuDoubleComplex *A[], int lda, cuDoubleComplex *B[], - int ldb, int *d_info, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, cuDoubleComplex *[], int, - cuDoubleComplex *[], int, int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZpotrsBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, B, ldb, d_info, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZpotri_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZpotri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZpotri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnStrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnStrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZtrtri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZtrtri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnStrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, float *, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnStrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDtrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, double *, - int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCtrtri( - cusolverDnHandle_t handle, cublasFillMode_t uplo, cublasDiagType_t diag, - int n, cuComplex *A, int lda, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, cuComplex *, - int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZtrtri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, - cublasDiagType_t diag, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, cublasDiagType_t, int, - cuDoubleComplex *, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZtrtri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, diag, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnClauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnClauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZlauum_bufferSize(cusolverDnHandle_t handle, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZlauum_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, double *work, - int lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - cuComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnClauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlauum(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - cuDoubleComplex *work, int lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZlauum"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, work, lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgetrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *Lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *Workspace, int *devIpiv, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, cuComplex *, - int, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, - cuDoubleComplex *Workspace, - int *devIpiv, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, Workspace, devIpiv, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSlaswp(cusolverDnHandle_t handle, int n, - float *A, int lda, int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, float *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnSlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDlaswp(cusolverDnHandle_t handle, int n, - double *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, double *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnDlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnClaswp(cusolverDnHandle_t handle, int n, - cuComplex *A, int lda, int k1, - int k2, const int *devIpiv, - int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuComplex *, int, int, int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnClaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZlaswp(cusolverDnHandle_t handle, int n, - cuDoubleComplex *A, int lda, - int k1, int k2, - const int *devIpiv, int incx) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuDoubleComplex *, int, int, - int, const int *, int); - static auto func_ptr = LoadSymbol("cusolverDnZlaswp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, k1, k2, devIpiv, incx); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const float *A, int lda, - const int *devIpiv, float *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const double *A, - int lda, const int *devIpiv, - double *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgetrs(cusolverDnHandle_t handle, - cublasOperation_t trans, int n, - int nrhs, const cuComplex *A, - int lda, const int *devIpiv, - cuComplex *B, int ldb, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgetrs( - cusolverDnHandle_t handle, cublasOperation_t trans, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *devIpiv, cuDoubleComplex *B, - int ldb, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf_bufferSize( - cusolverDnHandle_t handle, int m, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgeqrf_bufferSize(cusolverDnHandle_t handle, int m, int n, - cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgeqrf(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *TAU, float *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgeqrf(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *TAU, double *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgeqrf(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - cuComplex *TAU, - cuComplex *Workspace, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - cuComplex *, int, cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgeqrf(cusolverDnHandle_t handle, int m, - int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *TAU, - cuDoubleComplex *Workspace, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, cuDoubleComplex *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvqr(cusolverSpHandle_t handle, - int m, int nnz, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const float *b, float tol, - int reorder, float *x, - int *singularity) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverSpHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, float, int, float *, int *); - static auto func_ptr = LoadSymbol("cusolverSpScsrlsvqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrValA, csrRowPtrA, csrColIndA, - b, tol, reorder, x, singularity); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvqr(cusolverSpHandle_t handle, - int m, int nnz, - const cusparseMatDescr_t descrA, - const double *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const double *b, double tol, - int reorder, double *x, - int *singularity) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverSpHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, double, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverSpDcsrlsvqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrValA, csrRowPtrA, csrColIndA, - b, tol, reorder, x, singularity); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvqr(cusolverSpHandle_t handle, - int m, int nnz, - const cusparseMatDescr_t descrA, - const cuComplex *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const cuComplex *b, float tol, - int reorder, cuComplex *x, - int *singularity) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverSpHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, float, int, - cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverSpCcsrlsvqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrValA, csrRowPtrA, csrColIndA, - b, tol, reorder, x, singularity); -} - -cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvqr(cusolverSpHandle_t handle, - int m, int nnz, - const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const cuDoubleComplex *b, - double tol, - int reorder, - cuDoubleComplex *x, - int *singularity) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverSpHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const cuDoubleComplex *, - double, int, cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverSpZcsrlsvqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrValA, csrRowPtrA, csrColIndA, - b, tol, reorder, x, singularity); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const float *A, int lda, - const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const double *A, int lda, - const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuComplex *A, int lda, - const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, - int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr_bufferSize( - cusolverDnHandle_t handle, int m, int n, int k, const cuDoubleComplex *A, - int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgqr(cusolverDnHandle_t handle, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, float *, int, const float *, float *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgqr(cusolverDnHandle_t handle, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungqr(cusolverDnHandle_t handle, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuComplex *, int, const cuComplex *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungqr( - cusolverDnHandle_t handle, int m, int n, int k, cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, - const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, - const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, const double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, const cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const float *A, int lda, const float *tau, float *C, - int ldc, float *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const float *, int, const float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const double *A, int lda, const double *tau, double *C, - int ldc, double *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const double *, int, const double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuComplex *A, int lda, const cuComplex *tau, - cuComplex *C, int ldc, cuComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuComplex *, int, const cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, - int m, int n, int k, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, - cuDoubleComplex *work, int lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasOperation_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmqr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, - lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf_bufferSize( - cusolverDnHandle_t handle, int n, float *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf_bufferSize( - cusolverDnHandle_t handle, int n, double *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf_bufferSize( - cusolverDnHandle_t handle, int n, cuDoubleComplex *A, int lda, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, A, lda, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, int *ipiv, - float *work, int lwork, - int *info) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, int *ipiv, - double *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, int *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, int *ipiv, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrf(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - int *ipiv, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, int *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const float *A, int lda, const int *ipiv, float *B, int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - const int *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const double *A, int lda, const int *ipiv, double *B, int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - const int *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuComplex *A, int lda, const int *ipiv, cuComplex *B, int ldb, - int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *ipiv, cuDoubleComplex *B, - int ldb, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrs_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const float *A, int lda, - const int *ipiv, float *B, - int ldb, float *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const float *, int, - const int *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrs(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - int nrhs, const double *A, - int lda, const int *ipiv, - double *B, int ldb, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const double *, int, - const int *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCsytrs(cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - int nrhs, const cuComplex *A, int lda, const int *ipiv, - cuComplex *B, int ldb, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuComplex *, int, - const int *, cuComplex *, int, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytrs( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, int nrhs, - const cuDoubleComplex *A, int lda, const int *ipiv, cuDoubleComplex *B, - int ldb, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, - int, const int *, cuDoubleComplex *, int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, nrhs, A, lda, ipiv, B, ldb, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, float *A, int lda, - const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - float *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - double *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const int *ipiv, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - cuComplex *, int, const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytri_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, const int *ipiv, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const int *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytri_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const int *ipiv, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const int *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const int *ipiv, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const int *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCsytri(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, - const int *ipiv, cuComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, const int *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZsytri( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, const int *ipiv, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const int *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZsytri"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, ipiv, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *Lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, Lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgebrd(cusolverDnHandle_t handle, int m, - int n, float *A, int lda, - float *D, float *E, float *TAUQ, - float *TAUP, float *Work, - int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, float *, int, float *, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgebrd(cusolverDnHandle_t handle, int m, - int n, double *A, int lda, - double *D, double *E, - double *TAUQ, double *TAUP, - double *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, double *, int, double *, double *, double *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgebrd(cusolverDnHandle_t handle, int m, - int n, cuComplex *A, int lda, - float *D, float *E, - cuComplex *TAUQ, cuComplex *TAUP, - cuComplex *Work, int Lwork, - int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuComplex *, int, float *, float *, - cuComplex *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgebrd( - cusolverDnHandle_t handle, int m, int n, cuDoubleComplex *A, int lda, - double *D, double *E, cuDoubleComplex *TAUQ, cuDoubleComplex *TAUP, - cuDoubleComplex *Work, int Lwork, int *devInfo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, int, int, cuDoubleComplex *, int, double *, double *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgebrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, D, E, TAUQ, TAUP, Work, Lwork, devInfo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const float *A, int lda, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const double *A, int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuComplex *A, int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, const cuComplex *, - int, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungbr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, int k, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, - const cuDoubleComplex *, int, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, float *, int, - const float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, double *, int, - const double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungbr(cusolverDnHandle_t handle, - cublasSideMode_t side, int m, - int n, int k, cuComplex *A, - int lda, const cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZungbr(cusolverDnHandle_t handle, cublasSideMode_t side, int m, int n, - int k, cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, int, int, int, cuDoubleComplex *, - int, const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungbr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, m, n, k, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *d, const float *e, const float *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const float *, int, - const float *, const float *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *d, const double *e, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, const double *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const float *d, const float *e, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const float *, const float *, const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const double *d, const double *e, - const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const double *, const double *, const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsytrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, float *d, - float *e, float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, float *, float *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsytrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, double *A, int lda, - double *d, double *e, double *tau, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, double *, - double *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsytrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChetrd(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *d, - float *e, cuComplex *tau, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, float *, - float *, cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhetrd( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - double *, double *, cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhetrd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, d, e, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *tau, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr_bufferSize( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - float *A, int lda, - const float *tau, float *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, float *, int, const float *, - float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDorgtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - double *A, int lda, - const double *tau, double *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, double *, int, const double *, - double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDorgtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCungtr( - cusolverDnHandle_t handle, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, const cuComplex *tau, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuComplex *, int, - const cuComplex *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZungtr(cusolverDnHandle_t handle, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, - cuDoubleComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasFillMode_t, int, cuDoubleComplex *, int, - const cuDoubleComplex *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZungtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, uplo, n, A, lda, tau, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const float *A, int lda, - const float *tau, const float *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const float *, int, const float *, const float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const double *A, int lda, - const double *tau, const double *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const double *, int, const double *, const double *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuComplex *A, int lda, - const cuComplex *tau, const cuComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuComplex *, int, const cuComplex *, const cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr_bufferSize( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, const cuDoubleComplex *, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, float *A, int lda, float *tau, - float *C, int ldc, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, float *, int, float *, float *, int, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDormtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, double *A, int lda, double *tau, - double *C, int ldc, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, double *, int, double *, double *, int, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDormtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCunmtr(cusolverDnHandle_t handle, cublasSideMode_t side, - cublasFillMode_t uplo, cublasOperation_t trans, int m, int n, - cuComplex *A, int lda, cuComplex *tau, cuComplex *C, int ldc, - cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuComplex *, int, cuComplex *, cuComplex *, int, cuComplex *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZunmtr( - cusolverDnHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, - cublasOperation_t trans, int m, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *tau, cuDoubleComplex *C, int ldc, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, - int, int, cuDoubleComplex *, int, cuDoubleComplex *, cuDoubleComplex *, - int, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZunmtr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, side, uplo, trans, m, n, A, lda, tau, C, ldc, work, - lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvd_bufferSize( - cusolverDnHandle_t handle, int m, int n, int *lwork) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, int, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, float *A, int lda, float *S, float *U, int ldu, float *VT, int ldvt, - float *work, int lwork, float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, float *, int, - float *, float *, int, float *, int, float *, int, float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvd( - cusolverDnHandle_t handle, signed char jobu, signed char jobvt, int m, - int n, double *A, int lda, double *S, double *U, int ldu, double *VT, - int ldvt, double *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, double *, int, - double *, double *, int, double *, int, double *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuComplex *A, int lda, float *S, cuComplex *U, - int ldu, cuComplex *VT, int ldvt, cuComplex *work, int lwork, - float *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnCgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZgesvd(cusolverDnHandle_t handle, signed char jobu, signed char jobvt, - int m, int n, cuDoubleComplex *A, int lda, double *S, - cuDoubleComplex *U, int ldu, cuDoubleComplex *VT, int ldvt, - cuDoubleComplex *work, int lwork, double *rwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, signed char, signed char, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, - lwork, rwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevd( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevd(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, - double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const float *A, int lda, float vl, float vu, - int il, int iu, int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const float *, int, float, float, int, int, int *, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const double *A, int lda, double vl, - double vu, int il, int iu, int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const double *, int, double, double, int, int, - int *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, float vl, - float vu, int il, int iu, int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const cuComplex *, int, float, float, int, int, - int *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, double vl, - double vu, int il, int iu, int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, double, double, int, - int, int *, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, float *A, int lda, float vl, float vu, int il, - int iu, int *meig, float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, float *, int, float, float, int, int, int *, - float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, double *A, int lda, double vl, double vu, - int il, int iu, int *meig, double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, double *, int, double, double, int, int, int *, - double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCheevdx(cusolverDnHandle_t handle, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float vl, float vu, int il, int iu, - int *meig, float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, cuComplex *, int, float, float, int, int, int *, - float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnCheevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevdx( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cusolverEigRange_t range, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, double vl, - double vu, int il, int iu, int *meig, double *W, cuDoubleComplex *work, - int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cusolverEigRange_t, - cublasFillMode_t, int, cuDoubleComplex *, int, double, double, int, int, - int *, double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZheevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, range, uplo, n, A, lda, vl, vu, il, iu, meig, W, - work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const float *A, - int lda, const float *B, int ldb, float vl, float vu, int il, int iu, - int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const float *, int, - const float *, int, float, float, int, int, int *, const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const double *A, - int lda, const double *B, int ldb, double vl, double vu, int il, int iu, - int *meig, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const double *, int, - const double *, int, double, double, int, int, int *, const double *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, const cuComplex *A, - int lda, const cuComplex *B, int ldb, float vl, float vu, int il, int iu, - int *meig, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const cuComplex *, int, - const cuComplex *, int, float, float, int, int, int *, const float *, - int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvdx_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, - const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, - double vl, double vu, int il, int iu, int *meig, const double *W, - int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, double, double, int, int, int *, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, float *A, int lda, - float *B, int ldb, float vl, float vu, int il, int iu, int *meig, float *W, - float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, float *, int, float *, int, - float, float, int, int, int *, float *, float *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, double *A, int lda, - double *B, int ldb, double vl, double vu, int il, int iu, int *meig, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, double *, int, double *, int, - double, double, int, int, int *, double *, double *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, cuComplex *A, - int lda, cuComplex *B, int ldb, float vl, float vu, int il, int iu, - int *meig, float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, cuComplex *, int, cuComplex *, - int, float, float, int, int, int *, float *, cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvdx( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cusolverEigRange_t range, cublasFillMode_t uplo, int n, cuDoubleComplex *A, - int lda, cuDoubleComplex *B, int ldb, double vl, double vu, int il, int iu, - int *meig, double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, double, double, int, int, int *, double *, - cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, - il, iu, meig, W, work, lwork, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvd_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *); - static auto func_ptr = LoadSymbol("cusolverDnSsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *); - static auto func_ptr = LoadSymbol("cusolverDnDsygvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvd( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnChegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnZhegvd(cusolverDnHandle_t handle, cusolverEigType_t itype, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int n, - cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb, - double *W, cuDoubleComplex *work, int lwork, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *); - static auto func_ptr = LoadSymbol("cusolverDnZhegvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateSyevjInfo(syevjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateSyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroySyevjInfo(syevjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroySyevjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetTolerance(syevjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetMaxSweeps(syevjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjSetSortEig(syevjInfo_t info, - int sort_eig) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_eig); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetResidual( - cusolverDnHandle_t handle, syevjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - syevjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevjGetSweeps( - cusolverDnHandle_t handle, syevjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, syevjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDsyevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZheevjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, float *A, int lda, float *W, float *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, double *A, int lda, double *W, double *work, int lwork, int *info, - syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDsyevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuComplex *A, int lda, float *W, cuComplex *work, int lwork, - int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZheevjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const float *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const float *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const double *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const double *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuComplex *A, int lda, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuComplex *, int, const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, const cuDoubleComplex *A, int lda, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, lwork, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - float *A, int lda, float *W, - float *work, int lwork, int *info, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, float *, - int, float *, float *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsyevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - double *A, int lda, double *W, - double *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, double *, - int, double *, double *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsyevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCheevj(cusolverDnHandle_t handle, - cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, - cuComplex *A, int lda, float *W, - cuComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, cuComplex *, - int, float *, cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZheevj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, - int n, cuDoubleComplex *A, int lda, double *W, cuDoubleComplex *work, - int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, cublasFillMode_t, int, - cuDoubleComplex *, int, double *, cuDoubleComplex *, int, int *, - syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZheevj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, uplo, n, A, lda, W, work, lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const float *A, int lda, const float *B, - int ldb, const float *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const float *, int, const float *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const double *A, int lda, const double *B, - int ldb, const double *W, int *lwork, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const double *, int, const double *, int, - const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuComplex *A, int lda, - const cuComplex *B, int ldb, const float *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuComplex *, int, const cuComplex *, int, - const float *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj_bufferSize( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, const double *W, int *lwork, - syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, const double *, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, float *A, int lda, float *B, int ldb, - float *W, float *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, float *, int, float *, int, float *, float *, int, - int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDsygvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, double *A, int lda, double *B, int ldb, - double *W, double *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, double *, int, double *, int, double *, double *, - int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDsygvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnChegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuComplex *A, int lda, cuComplex *B, int ldb, - float *W, cuComplex *work, int lwork, int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuComplex *, int, cuComplex *, int, float *, - cuComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnChegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZhegvj( - cusolverDnHandle_t handle, cusolverEigType_t itype, cusolverEigMode_t jobz, - cublasFillMode_t uplo, int n, cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, double *W, cuDoubleComplex *work, int lwork, - int *info, syevjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigType_t, cusolverEigMode_t, - cublasFillMode_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, int *, syevjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZhegvj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, itype, jobz, uplo, n, A, lda, B, ldb, W, work, lwork, - info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCreateGesvdjInfo(gesvdjInfo_t *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDestroyGesvdjInfo(gesvdjInfo_t info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroyGesvdjInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetTolerance(gesvdjInfo_t info, - double tolerance) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, double); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetTolerance"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, tolerance); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetMaxSweeps(gesvdjInfo_t info, - int max_sweeps) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetMaxSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, max_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjSetSortEig(gesvdjInfo_t info, - int sort_svd) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjSetSortEig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, sort_svd); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetResidual( - cusolverDnHandle_t handle, gesvdjInfo_t info, double *residual) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, - gesvdjInfo_t, double *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetResidual"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, residual); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdjGetSweeps( - cusolverDnHandle_t handle, gesvdjInfo_t info, int *executed_sweeps) { - using FuncPtr = - cusolverStatus_t(CUSOLVERAPI *)(cusolverDnHandle_t, gesvdjInfo_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdjGetSweeps"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, executed_sweeps); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuComplex *, int, - const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, const cuDoubleComplex *, - int, const double *, const cuDoubleComplex *, int, - const cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdjBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, lwork, params, - batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, float *A, - int lda, float *S, float *U, int ldu, float *V, int ldv, float *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, float *, int, float *, - float *, int, float *, int, float *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, double *A, - int lda, double *S, double *U, int ldu, double *V, int ldv, double *work, - int lwork, int *info, gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, double *, int, double *, - double *, int, double *, int, double *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdjBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, cuDoubleComplex *, int, - double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdjBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, - info, params, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const float *A, int lda, const float *S, const float *U, int ldu, - const float *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - const float *, const float *, int, const float *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const double *A, int lda, const double *S, const double *U, int ldu, - const double *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - const double *, const double *, int, const double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuComplex *A, int lda, const float *S, const cuComplex *U, int ldu, - const cuComplex *V, int ldv, int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, const float *, const cuComplex *, int, const cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - const cuDoubleComplex *A, int lda, const double *S, - const cuDoubleComplex *U, int ldu, const cuDoubleComplex *V, int ldv, - int *lwork, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, const double *, const cuDoubleComplex *, - int, const cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, lwork, - params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - float *A, int lda, float *S, float *U, int ldu, float *V, int ldv, - float *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, float *, int, - float *, float *, int, float *, int, float *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - double *A, int lda, double *S, double *U, int ldu, double *V, int ldv, - double *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, double *, int, - double *, double *, int, double *, int, double *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuComplex *A, int lda, float *S, cuComplex *U, int ldu, cuComplex *V, - int ldv, cuComplex *work, int lwork, int *info, gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuComplex *, int, - float *, cuComplex *, int, cuComplex *, int, cuComplex *, int, int *, - gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdj( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int econ, int m, int n, - cuDoubleComplex *A, int lda, double *S, cuDoubleComplex *U, int ldu, - cuDoubleComplex *V, int ldv, cuDoubleComplex *work, int lwork, int *info, - gesvdjInfo_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, cuDoubleComplex *, - int, double *, cuDoubleComplex *, int, cuDoubleComplex *, int, - cuDoubleComplex *, int, int *, gesvdjInfo_t); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdj"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, - lwork, info, params); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const float *d_A, int lda, long long int strideA, const float *d_S, - long long int strideS, const float *d_U, int ldu, long long int strideU, - const float *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - long long, const float *, long long, const float *, int, long long, - const float *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnSgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const double *d_A, int lda, long long int strideA, const double *d_S, - long long int strideS, const double *d_U, int ldu, long long int strideU, - const double *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - long long, const double *, long long, const double *, int, long long, - const double *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnDgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuComplex *d_A, int lda, long long int strideA, const float *d_S, - long long int strideS, const cuComplex *d_U, int ldu, long long int strideU, - const cuComplex *d_V, int ldv, long long int strideV, int *lwork, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, long long, const float *, long long, const cuComplex *, int, - long long, const cuComplex *, int, long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnCgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched_bufferSize( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuDoubleComplex *d_A, int lda, long long int strideA, - const double *d_S, long long int strideS, const cuDoubleComplex *d_U, - int ldu, long long int strideU, const cuDoubleComplex *d_V, int ldv, - long long int strideV, int *lwork, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, long long, const double *, long long, - const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, - long long, int *, int); - static auto func_ptr = - LoadSymbol("cusolverDnZgesvdaStridedBatched_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, lwork, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const float *d_A, int lda, long long int strideA, float *d_S, - long long int strideS, float *d_U, int ldu, long long int strideU, - float *d_V, int ldv, long long int strideV, float *d_work, int lwork, - int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const float *, int, - long long, float *, long long, float *, int, long long, float *, int, - long long, float *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnSgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const double *d_A, int lda, long long int strideA, double *d_S, - long long int strideS, double *d_U, int ldu, long long int strideU, - double *d_V, int ldv, long long int strideV, double *d_work, int lwork, - int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const double *, int, - long long, double *, long long, double *, int, long long, double *, int, - long long, double *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnDgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuComplex *d_A, int lda, long long int strideA, float *d_S, - long long int strideS, cuComplex *d_U, int ldu, long long int strideU, - cuComplex *d_V, int ldv, long long int strideV, cuComplex *d_work, - int lwork, int *d_info, double *h_R_nrmF, int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, const cuComplex *, - int, long long, float *, long long, cuComplex *, int, long long, - cuComplex *, int, long long, cuComplex *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnCgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched( - cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, - const cuDoubleComplex *d_A, int lda, long long int strideA, double *d_S, - long long int strideS, cuDoubleComplex *d_U, int ldu, long long int strideU, - cuDoubleComplex *d_V, int ldv, long long int strideV, - cuDoubleComplex *d_work, int lwork, int *d_info, double *h_R_nrmF, - int batchSize) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverEigMode_t, int, int, int, - const cuDoubleComplex *, int, long long, double *, long long, - cuDoubleComplex *, int, long long, cuDoubleComplex *, int, long long, - cuDoubleComplex *, int, int *, double *, int); - static auto func_ptr = LoadSymbol("cusolverDnZgesvdaStridedBatched"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, jobz, rank, m, n, d_A, lda, strideA, d_S, strideS, - d_U, ldu, strideU, d_V, ldv, strideV, d_work, lwork, d_info, - h_R_nrmF, batchSize); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnCreateParams(cusolverDnParams_t *params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnParams_t *); - static auto func_ptr = LoadSymbol("cusolverDnCreateParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnDestroyParams(cusolverDnParams_t params) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)(cusolverDnParams_t); - static auto func_ptr = LoadSymbol("cusolverDnDestroyParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSetAdvOptions(cusolverDnParams_t params, - cusolverDnFunction_t function, cusolverAlgMode_t algo) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnParams_t, cusolverDnFunction_t, cusolverAlgMode_t); - static auto func_ptr = LoadSymbol("cusolverDnSetAdvOptions"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(params, function, algo); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnPotrf_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, - int64_t n, cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType computeType, size_t *workspaceInBytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasFillMode_t, int64_t, - cudaDataType, const void *, int64_t, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnPotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, uplo, n, dataTypeA, A, lda, computeType, - workspaceInBytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnPotrf(cusolverDnHandle_t handle, cusolverDnParams_t params, - cublasFillMode_t uplo, int64_t n, cudaDataType dataTypeA, - void *A, int64_t lda, cudaDataType computeType, void *pBuffer, - size_t workspaceInBytes, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasFillMode_t, int64_t, - cudaDataType, void *, int64_t, cudaDataType, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnPotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, uplo, n, dataTypeA, A, lda, computeType, - pBuffer, workspaceInBytes, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnPotrs( - cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, - int64_t n, int64_t nrhs, cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType dataTypeB, void *B, int64_t ldb, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasFillMode_t, int64_t, - int64_t, cudaDataType, const void *, int64_t, cudaDataType, void *, - int64_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnPotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, uplo, n, nrhs, dataTypeA, A, lda, dataTypeB, - B, ldb, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGeqrf_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType dataTypeTau, const void *tau, cudaDataType computeType, - size_t *workspaceInBytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - const void *, int64_t, cudaDataType, const void *, cudaDataType, - size_t *); - static auto func_ptr = LoadSymbol("cusolverDnGeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, dataTypeTau, tau, - computeType, workspaceInBytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnGeqrf(cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, - int64_t n, cudaDataType dataTypeA, void *A, int64_t lda, - cudaDataType dataTypeTau, void *tau, cudaDataType computeType, - void *pBuffer, size_t workspaceInBytes, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - void *, int64_t, cudaDataType, void *, cudaDataType, void *, size_t, - int *); - static auto func_ptr = LoadSymbol("cusolverDnGeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, dataTypeTau, tau, - computeType, pBuffer, workspaceInBytes, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGetrf_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType computeType, size_t *workspaceInBytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - const void *, int64_t, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnGetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, computeType, - workspaceInBytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnGetrf(cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, - int64_t n, cudaDataType dataTypeA, void *A, int64_t lda, - int64_t *ipiv, cudaDataType computeType, void *pBuffer, - size_t workspaceInBytes, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - void *, int64_t, int64_t *, cudaDataType, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnGetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, ipiv, computeType, - pBuffer, workspaceInBytes, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGetrs( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cublasOperation_t trans, int64_t n, int64_t nrhs, cudaDataType dataTypeA, - const void *A, int64_t lda, const int64_t *ipiv, cudaDataType dataTypeB, - void *B, int64_t ldb, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasOperation_t, int64_t, - int64_t, cudaDataType, const void *, int64_t, const int64_t *, - cudaDataType, void *, int64_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnGetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, trans, n, nrhs, dataTypeA, A, lda, ipiv, - dataTypeB, B, ldb, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSyevd_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, cudaDataType dataTypeW, - const void *W, cudaDataType computeType, size_t *workspaceInBytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cublasFillMode_t, int64_t, cudaDataType, const void *, int64_t, - cudaDataType, const void *, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, uplo, n, dataTypeA, A, lda, dataTypeW, - W, computeType, workspaceInBytes); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnSyevd(cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, - cudaDataType dataTypeA, void *A, int64_t lda, - cudaDataType dataTypeW, void *W, cudaDataType computeType, - void *pBuffer, size_t workspaceInBytes, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cublasFillMode_t, int64_t, cudaDataType, void *, int64_t, cudaDataType, - void *, cudaDataType, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnSyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, uplo, n, dataTypeA, A, lda, dataTypeW, - W, computeType, pBuffer, workspaceInBytes, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cusolverEigRange_t range, cublasFillMode_t uplo, - int64_t n, cudaDataType dataTypeA, const void *A, int64_t lda, void *vl, - void *vu, int64_t il, int64_t iu, int64_t *h_meig, cudaDataType dataTypeW, - const void *W, cudaDataType computeType, size_t *workspaceInBytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int64_t, cudaDataType, const void *, - int64_t, void *, void *, int64_t, int64_t, int64_t *, cudaDataType, - const void *, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnSyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, range, uplo, n, dataTypeA, A, lda, vl, - vu, il, iu, h_meig, dataTypeW, W, computeType, - workspaceInBytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnSyevdx( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cusolverEigRange_t range, cublasFillMode_t uplo, - int64_t n, cudaDataType dataTypeA, void *A, int64_t lda, void *vl, void *vu, - int64_t il, int64_t iu, int64_t *meig64, cudaDataType dataTypeW, void *W, - cudaDataType computeType, void *pBuffer, size_t workspaceInBytes, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int64_t, cudaDataType, void *, - int64_t, void *, void *, int64_t, int64_t, int64_t *, cudaDataType, - void *, cudaDataType, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnSyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, range, uplo, n, dataTypeA, A, lda, vl, - vu, il, iu, meig64, dataTypeW, W, computeType, pBuffer, - workspaceInBytes, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGesvd_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, signed char jobu, - signed char jobvt, int64_t m, int64_t n, cudaDataType dataTypeA, - const void *A, int64_t lda, cudaDataType dataTypeS, const void *S, - cudaDataType dataTypeU, const void *U, int64_t ldu, cudaDataType dataTypeVT, - const void *VT, int64_t ldvt, cudaDataType computeType, - size_t *workspaceInBytes) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, signed char, signed char, int64_t, - int64_t, cudaDataType, const void *, int64_t, cudaDataType, const void *, - cudaDataType, const void *, int64_t, cudaDataType, const void *, int64_t, - cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnGesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobu, jobvt, m, n, dataTypeA, A, lda, - dataTypeS, S, dataTypeU, U, ldu, dataTypeVT, VT, ldvt, - computeType, workspaceInBytes); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnGesvd( - cusolverDnHandle_t handle, cusolverDnParams_t params, signed char jobu, - signed char jobvt, int64_t m, int64_t n, cudaDataType dataTypeA, void *A, - int64_t lda, cudaDataType dataTypeS, void *S, cudaDataType dataTypeU, - void *U, int64_t ldu, cudaDataType dataTypeVT, void *VT, int64_t ldvt, - cudaDataType computeType, void *pBuffer, size_t workspaceInBytes, - int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, signed char, signed char, int64_t, - int64_t, cudaDataType, void *, int64_t, cudaDataType, void *, - cudaDataType, void *, int64_t, cudaDataType, void *, int64_t, - cudaDataType, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnGesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobu, jobvt, m, n, dataTypeA, A, lda, - dataTypeS, S, dataTypeU, U, ldu, dataTypeVT, VT, ldvt, - computeType, pBuffer, workspaceInBytes, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXpotrf_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, - int64_t n, cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType computeType, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasFillMode_t, int64_t, - cudaDataType, const void *, int64_t, cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXpotrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, uplo, n, dataTypeA, A, lda, computeType, - workspaceInBytesOnDevice, workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnXpotrf(cusolverDnHandle_t handle, cusolverDnParams_t params, - cublasFillMode_t uplo, int64_t n, cudaDataType dataTypeA, - void *A, int64_t lda, cudaDataType computeType, - void *bufferOnDevice, size_t workspaceInBytesOnDevice, - void *bufferOnHost, size_t workspaceInBytesOnHost, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasFillMode_t, int64_t, - cudaDataType, void *, int64_t, cudaDataType, void *, size_t, void *, - size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXpotrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, uplo, n, dataTypeA, A, lda, computeType, - bufferOnDevice, workspaceInBytesOnDevice, bufferOnHost, - workspaceInBytesOnHost, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXpotrs( - cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, - int64_t n, int64_t nrhs, cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType dataTypeB, void *B, int64_t ldb, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasFillMode_t, int64_t, - int64_t, cudaDataType, const void *, int64_t, cudaDataType, void *, - int64_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXpotrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, uplo, n, nrhs, dataTypeA, A, lda, dataTypeB, - B, ldb, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgeqrf_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType dataTypeTau, const void *tau, cudaDataType computeType, - size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - const void *, int64_t, cudaDataType, const void *, cudaDataType, size_t *, - size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXgeqrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, dataTypeTau, tau, - computeType, workspaceInBytesOnDevice, - workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgeqrf( - cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, - cudaDataType dataTypeA, void *A, int64_t lda, cudaDataType dataTypeTau, - void *tau, cudaDataType computeType, void *bufferOnDevice, - size_t workspaceInBytesOnDevice, void *bufferOnHost, - size_t workspaceInBytesOnHost, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - void *, int64_t, cudaDataType, void *, cudaDataType, void *, size_t, - void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgeqrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, dataTypeTau, tau, - computeType, bufferOnDevice, workspaceInBytesOnDevice, - bufferOnHost, workspaceInBytesOnHost, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgetrf_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType computeType, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - const void *, int64_t, cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXgetrf_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, computeType, - workspaceInBytesOnDevice, workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnXgetrf(cusolverDnHandle_t handle, cusolverDnParams_t params, - int64_t m, int64_t n, cudaDataType dataTypeA, void *A, - int64_t lda, int64_t *ipiv, cudaDataType computeType, - void *bufferOnDevice, size_t workspaceInBytesOnDevice, - void *bufferOnHost, size_t workspaceInBytesOnHost, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, int64_t, int64_t, cudaDataType, - void *, int64_t, int64_t *, cudaDataType, void *, size_t, void *, size_t, - int *); - static auto func_ptr = LoadSymbol("cusolverDnXgetrf"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, m, n, dataTypeA, A, lda, ipiv, computeType, - bufferOnDevice, workspaceInBytesOnDevice, bufferOnHost, - workspaceInBytesOnHost, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgetrs( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cublasOperation_t trans, int64_t n, int64_t nrhs, cudaDataType dataTypeA, - const void *A, int64_t lda, const int64_t *ipiv, cudaDataType dataTypeB, - void *B, int64_t ldb, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cublasOperation_t, int64_t, - int64_t, cudaDataType, const void *, int64_t, const int64_t *, - cudaDataType, void *, int64_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgetrs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, trans, n, nrhs, dataTypeA, A, lda, ipiv, - dataTypeB, B, ldb, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevd_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, cudaDataType dataTypeW, - const void *W, cudaDataType computeType, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cublasFillMode_t, int64_t, cudaDataType, const void *, int64_t, - cudaDataType, const void *, cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, uplo, n, dataTypeA, A, lda, dataTypeW, - W, computeType, workspaceInBytesOnDevice, - workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI -cusolverDnXsyevd(cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cublasFillMode_t uplo, int64_t n, - cudaDataType dataTypeA, void *A, int64_t lda, - cudaDataType dataTypeW, void *W, cudaDataType computeType, - void *bufferOnDevice, size_t workspaceInBytesOnDevice, - void *bufferOnHost, size_t workspaceInBytesOnHost, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cublasFillMode_t, int64_t, cudaDataType, void *, int64_t, cudaDataType, - void *, cudaDataType, void *, size_t, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, uplo, n, dataTypeA, A, lda, dataTypeW, - W, computeType, bufferOnDevice, workspaceInBytesOnDevice, - bufferOnHost, workspaceInBytesOnHost, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevdx_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cusolverEigRange_t range, cublasFillMode_t uplo, - int64_t n, cudaDataType dataTypeA, const void *A, int64_t lda, void *vl, - void *vu, int64_t il, int64_t iu, int64_t *h_meig, cudaDataType dataTypeW, - const void *W, cudaDataType computeType, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int64_t, cudaDataType, const void *, - int64_t, void *, void *, int64_t, int64_t, int64_t *, cudaDataType, - const void *, cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevdx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, range, uplo, n, dataTypeA, A, lda, vl, - vu, il, iu, h_meig, dataTypeW, W, computeType, - workspaceInBytesOnDevice, workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXsyevdx( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, cusolverEigRange_t range, cublasFillMode_t uplo, - int64_t n, cudaDataType dataTypeA, void *A, int64_t lda, void *vl, void *vu, - int64_t il, int64_t iu, int64_t *meig64, cudaDataType dataTypeW, void *W, - cudaDataType computeType, void *bufferOnDevice, - size_t workspaceInBytesOnDevice, void *bufferOnHost, - size_t workspaceInBytesOnHost, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, - cusolverEigRange_t, cublasFillMode_t, int64_t, cudaDataType, void *, - int64_t, void *, void *, int64_t, int64_t, int64_t *, cudaDataType, - void *, cudaDataType, void *, size_t, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXsyevdx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, range, uplo, n, dataTypeA, A, lda, vl, - vu, il, iu, meig64, dataTypeW, W, computeType, bufferOnDevice, - workspaceInBytesOnDevice, bufferOnHost, - workspaceInBytesOnHost, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvd_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, signed char jobu, - signed char jobvt, int64_t m, int64_t n, cudaDataType dataTypeA, - const void *A, int64_t lda, cudaDataType dataTypeS, const void *S, - cudaDataType dataTypeU, const void *U, int64_t ldu, cudaDataType dataTypeVT, - const void *VT, int64_t ldvt, cudaDataType computeType, - size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, signed char, signed char, int64_t, - int64_t, cudaDataType, const void *, int64_t, cudaDataType, const void *, - cudaDataType, const void *, int64_t, cudaDataType, const void *, int64_t, - cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvd_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobu, jobvt, m, n, dataTypeA, A, lda, - dataTypeS, S, dataTypeU, U, ldu, dataTypeVT, VT, ldvt, - computeType, workspaceInBytesOnDevice, - workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvd( - cusolverDnHandle_t handle, cusolverDnParams_t params, signed char jobu, - signed char jobvt, int64_t m, int64_t n, cudaDataType dataTypeA, void *A, - int64_t lda, cudaDataType dataTypeS, void *S, cudaDataType dataTypeU, - void *U, int64_t ldu, cudaDataType dataTypeVT, void *VT, int64_t ldvt, - cudaDataType computeType, void *bufferOnDevice, - size_t workspaceInBytesOnDevice, void *bufferOnHost, - size_t workspaceInBytesOnHost, int *info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, signed char, signed char, int64_t, - int64_t, cudaDataType, void *, int64_t, cudaDataType, void *, - cudaDataType, void *, int64_t, cudaDataType, void *, int64_t, - cudaDataType, void *, size_t, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvd"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobu, jobvt, m, n, dataTypeA, A, lda, - dataTypeS, S, dataTypeU, U, ldu, dataTypeVT, VT, ldvt, - computeType, bufferOnDevice, workspaceInBytesOnDevice, - bufferOnHost, workspaceInBytesOnHost, info); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdp_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, int econ, int64_t m, int64_t n, - cudaDataType dataTypeA, const void *A, int64_t lda, cudaDataType dataTypeS, - const void *S, cudaDataType dataTypeU, const void *U, int64_t ldu, - cudaDataType dataTypeV, const void *V, int64_t ldv, - cudaDataType computeType, size_t *workspaceInBytesOnDevice, - size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, int, int64_t, - int64_t, cudaDataType, const void *, int64_t, cudaDataType, const void *, - cudaDataType, const void *, int64_t, cudaDataType, const void *, int64_t, - cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdp_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, econ, m, n, dataTypeA, A, lda, - dataTypeS, S, dataTypeU, U, ldu, dataTypeV, V, ldv, - computeType, workspaceInBytesOnDevice, - workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdp( - cusolverDnHandle_t handle, cusolverDnParams_t params, - cusolverEigMode_t jobz, int econ, int64_t m, int64_t n, - cudaDataType dataTypeA, void *A, int64_t lda, cudaDataType dataTypeS, - void *S, cudaDataType dataTypeU, void *U, int64_t ldu, - cudaDataType dataTypeV, void *V, int64_t ldv, cudaDataType computeType, - void *bufferOnDevice, size_t workspaceInBytesOnDevice, void *bufferOnHost, - size_t workspaceInBytesOnHost, int *d_info, double *h_err_sigma) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, cusolverEigMode_t, int, int64_t, - int64_t, cudaDataType, void *, int64_t, cudaDataType, void *, - cudaDataType, void *, int64_t, cudaDataType, void *, int64_t, - cudaDataType, void *, size_t, void *, size_t, int *, double *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobz, econ, m, n, dataTypeA, A, lda, - dataTypeS, S, dataTypeU, U, ldu, dataTypeV, V, ldv, - computeType, bufferOnDevice, workspaceInBytesOnDevice, - bufferOnHost, workspaceInBytesOnHost, d_info, h_err_sigma); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdr_bufferSize( - cusolverDnHandle_t handle, cusolverDnParams_t params, signed char jobu, - signed char jobv, int64_t m, int64_t n, int64_t k, int64_t p, - int64_t niters, cudaDataType dataTypeA, const void *A, int64_t lda, - cudaDataType dataTypeSrand, const void *Srand, cudaDataType dataTypeUrand, - const void *Urand, int64_t ldUrand, cudaDataType dataTypeVrand, - const void *Vrand, int64_t ldVrand, cudaDataType computeType, - size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, signed char, signed char, int64_t, - int64_t, int64_t, int64_t, int64_t, cudaDataType, const void *, int64_t, - cudaDataType, const void *, cudaDataType, const void *, int64_t, - cudaDataType, const void *, int64_t, cudaDataType, size_t *, size_t *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobu, jobv, m, n, k, p, niters, dataTypeA, A, - lda, dataTypeSrand, Srand, dataTypeUrand, Urand, ldUrand, - dataTypeVrand, Vrand, ldVrand, computeType, - workspaceInBytesOnDevice, workspaceInBytesOnHost); -} - -cusolverStatus_t CUSOLVERAPI cusolverDnXgesvdr( - cusolverDnHandle_t handle, cusolverDnParams_t params, signed char jobu, - signed char jobv, int64_t m, int64_t n, int64_t k, int64_t p, - int64_t niters, cudaDataType dataTypeA, void *A, int64_t lda, - cudaDataType dataTypeSrand, void *Srand, cudaDataType dataTypeUrand, - void *Urand, int64_t ldUrand, cudaDataType dataTypeVrand, void *Vrand, - int64_t ldVrand, cudaDataType computeType, void *bufferOnDevice, - size_t workspaceInBytesOnDevice, void *bufferOnHost, - size_t workspaceInBytesOnHost, int *d_info) { - using FuncPtr = cusolverStatus_t(CUSOLVERAPI *)( - cusolverDnHandle_t, cusolverDnParams_t, signed char, signed char, int64_t, - int64_t, int64_t, int64_t, int64_t, cudaDataType, void *, int64_t, - cudaDataType, void *, cudaDataType, void *, int64_t, cudaDataType, void *, - int64_t, cudaDataType, void *, size_t, void *, size_t, int *); - static auto func_ptr = LoadSymbol("cusolverDnXgesvdr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, params, jobu, jobv, m, n, k, p, niters, dataTypeA, A, - lda, dataTypeSrand, Srand, dataTypeUrand, Urand, ldUrand, - dataTypeVrand, Vrand, ldVrand, computeType, bufferOnDevice, - workspaceInBytesOnDevice, bufferOnHost, - workspaceInBytesOnHost, d_info); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cusolver_stub.cc index f441d7ebe7da8a..d11601b3bd4217 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusolver_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cusolver_stub.cc @@ -35,28 +35,40 @@ void* GetDsoHandle() { #endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -cusolverStatus_t GetSymbolNotFoundError() { +const char* kSymbols[] = { +#include "tsl/cuda/cusolver.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + +} // namespace + +extern "C" { + +static cusolverStatus_t GetSymbolNotFoundError() { return CUSOLVER_STATUS_INTERNAL_ERROR; } -} // namespace -#if CUDA_VERSION < 10010 -#include "tsl/cuda/cusolver_dense_10_0.inc" -#elif CUDA_VERSION < 10020 -#include "tsl/cuda/cusolver_dense_10_1.inc" -#elif CUDA_VERSION < 11000 -#include "tsl/cuda/cusolver_dense_10_2.inc" -#else -#include "tsl/cuda/cusolver_dense_11_0.inc" -#endif +extern void* _cusolver_tramp_table[]; + +void _cusolver_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cusolver_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse.symbols b/third_party/xla/third_party/tsl/tsl/cuda/cusparse.symbols new file mode 100644 index 00000000000000..0b7ff187600793 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/cusparse.symbols @@ -0,0 +1,421 @@ +cusparseAxpby +cusparseBlockedEllGet +cusparseBsrSetStridedBatch +cusparseCbsr2csr +cusparseCbsric02 +cusparseCbsric02_analysis +cusparseCbsric02_bufferSize +cusparseCbsrilu02 +cusparseCbsrilu02_analysis +cusparseCbsrilu02_bufferSize +cusparseCbsrilu02_numericBoost +cusparseCbsrmm +cusparseCbsrmv +cusparseCbsrsm2_analysis +cusparseCbsrsm2_bufferSize +cusparseCbsrsm2_solve +cusparseCbsrsv2_analysis +cusparseCbsrsv2_bufferSize +cusparseCbsrsv2_solve +cusparseCbsrxmv +cusparseCcsr2bsr +cusparseCcsr2csr_compress +cusparseCcsr2csru +cusparseCcsr2gebsr +cusparseCcsr2gebsr_bufferSize +cusparseCcsrcolor +cusparseCcsrgeam2 +cusparseCcsrgeam2_bufferSizeExt +cusparseCcsric02 +cusparseCcsric02_analysis +cusparseCcsric02_bufferSize +cusparseCcsrilu02 +cusparseCcsrilu02_analysis +cusparseCcsrilu02_bufferSize +cusparseCcsrilu02_numericBoost +cusparseCcsru2csr +cusparseCcsru2csr_bufferSizeExt +cusparseCgebsr2csr +cusparseCgebsr2gebsc +cusparseCgebsr2gebsc_bufferSize +cusparseCgebsr2gebsr +cusparseCgebsr2gebsr_bufferSize +cusparseCgemvi +cusparseCgemvi_bufferSize +cusparseCgpsvInterleavedBatch +cusparseCgpsvInterleavedBatch_bufferSizeExt +cusparseCgtsv2 +cusparseCgtsv2StridedBatch +cusparseCgtsv2StridedBatch_bufferSizeExt +cusparseCgtsv2_bufferSizeExt +cusparseCgtsv2_nopivot +cusparseCgtsv2_nopivot_bufferSizeExt +cusparseCgtsvInterleavedBatch +cusparseCgtsvInterleavedBatch_bufferSizeExt +cusparseCnnz +cusparseCnnz_compress +cusparseConstBlockedEllGet +cusparseConstCooGet +cusparseConstCscGet +cusparseConstCsrGet +cusparseConstDnMatGet +cusparseConstDnMatGetValues +cusparseConstDnVecGet +cusparseConstDnVecGetValues +cusparseConstSpMatGetValues +cusparseConstSpVecGet +cusparseConstSpVecGetValues +cusparseCooGet +cusparseCooSetPointers +cusparseCooSetStridedBatch +cusparseCreate +cusparseCreateBlockedEll +cusparseCreateBsr +cusparseCreateBsric02Info +cusparseCreateBsrilu02Info +cusparseCreateBsrsm2Info +cusparseCreateBsrsv2Info +cusparseCreateColorInfo +cusparseCreateConstBlockedEll +cusparseCreateConstBsr +cusparseCreateConstCoo +cusparseCreateConstCsc +cusparseCreateConstCsr +cusparseCreateConstDnMat +cusparseCreateConstDnVec +cusparseCreateConstSlicedEll +cusparseCreateConstSpVec +cusparseCreateCoo +cusparseCreateCsc +cusparseCreateCsr +cusparseCreateCsric02Info +cusparseCreateCsrilu02Info +cusparseCreateCsru2csrInfo +cusparseCreateDnMat +cusparseCreateDnVec +cusparseCreateIdentityPermutation +cusparseCreateMatDescr +cusparseCreatePruneInfo +cusparseCreateSlicedEll +cusparseCreateSpVec +cusparseCscGet +cusparseCscSetPointers +cusparseCsr2cscEx2 +cusparseCsr2cscEx2_bufferSize +cusparseCsrGet +cusparseCsrSetPointers +cusparseCsrSetStridedBatch +cusparseDbsr2csr +cusparseDbsric02 +cusparseDbsric02_analysis +cusparseDbsric02_bufferSize +cusparseDbsrilu02 +cusparseDbsrilu02_analysis +cusparseDbsrilu02_bufferSize +cusparseDbsrilu02_numericBoost +cusparseDbsrmm +cusparseDbsrmv +cusparseDbsrsm2_analysis +cusparseDbsrsm2_bufferSize +cusparseDbsrsm2_solve +cusparseDbsrsv2_analysis +cusparseDbsrsv2_bufferSize +cusparseDbsrsv2_solve +cusparseDbsrxmv +cusparseDcsr2bsr +cusparseDcsr2csr_compress +cusparseDcsr2csru +cusparseDcsr2gebsr +cusparseDcsr2gebsr_bufferSize +cusparseDcsrcolor +cusparseDcsrgeam2 +cusparseDcsrgeam2_bufferSizeExt +cusparseDcsric02 +cusparseDcsric02_analysis +cusparseDcsric02_bufferSize +cusparseDcsrilu02 +cusparseDcsrilu02_analysis +cusparseDcsrilu02_bufferSize +cusparseDcsrilu02_numericBoost +cusparseDcsru2csr +cusparseDcsru2csr_bufferSizeExt +cusparseDenseToSparse_analysis +cusparseDenseToSparse_bufferSize +cusparseDenseToSparse_convert +cusparseDestroy +cusparseDestroyBsric02Info +cusparseDestroyBsrilu02Info +cusparseDestroyBsrsm2Info +cusparseDestroyBsrsv2Info +cusparseDestroyColorInfo +cusparseDestroyCsric02Info +cusparseDestroyCsrilu02Info +cusparseDestroyCsru2csrInfo +cusparseDestroyDnMat +cusparseDestroyDnVec +cusparseDestroyMatDescr +cusparseDestroyPruneInfo +cusparseDestroySpMat +cusparseDestroySpVec +cusparseDgebsr2csr +cusparseDgebsr2gebsc +cusparseDgebsr2gebsc_bufferSize +cusparseDgebsr2gebsr +cusparseDgebsr2gebsr_bufferSize +cusparseDgemvi +cusparseDgemvi_bufferSize +cusparseDgpsvInterleavedBatch +cusparseDgpsvInterleavedBatch_bufferSizeExt +cusparseDgtsv2 +cusparseDgtsv2StridedBatch +cusparseDgtsv2StridedBatch_bufferSizeExt +cusparseDgtsv2_bufferSizeExt +cusparseDgtsv2_nopivot +cusparseDgtsv2_nopivot_bufferSizeExt +cusparseDgtsvInterleavedBatch +cusparseDgtsvInterleavedBatch_bufferSizeExt +cusparseDnMatGet +cusparseDnMatGetStridedBatch +cusparseDnMatGetValues +cusparseDnMatSetStridedBatch +cusparseDnMatSetValues +cusparseDnVecGet +cusparseDnVecGetValues +cusparseDnVecSetValues +cusparseDnnz +cusparseDnnz_compress +cusparseDpruneCsr2csr +cusparseDpruneCsr2csrByPercentage +cusparseDpruneCsr2csrByPercentage_bufferSizeExt +cusparseDpruneCsr2csrNnz +cusparseDpruneCsr2csrNnzByPercentage +cusparseDpruneCsr2csr_bufferSizeExt +cusparseDpruneDense2csr +cusparseDpruneDense2csrByPercentage +cusparseDpruneDense2csrByPercentage_bufferSizeExt +cusparseDpruneDense2csrNnz +cusparseDpruneDense2csrNnzByPercentage +cusparseDpruneDense2csr_bufferSizeExt +cusparseGather +cusparseGetErrorName +cusparseGetErrorString +cusparseGetMatDiagType +cusparseGetMatFillMode +cusparseGetMatIndexBase +cusparseGetMatType +cusparseGetPointerMode +cusparseGetProperty +cusparseGetStream +cusparseGetVersion +cusparseHpruneCsr2csr +cusparseHpruneCsr2csrByPercentage +cusparseHpruneCsr2csrByPercentage_bufferSizeExt +cusparseHpruneCsr2csrNnz +cusparseHpruneCsr2csrNnzByPercentage +cusparseHpruneCsr2csr_bufferSizeExt +cusparseHpruneDense2csr +cusparseHpruneDense2csrByPercentage +cusparseHpruneDense2csrByPercentage_bufferSizeExt +cusparseHpruneDense2csrNnz +cusparseHpruneDense2csrNnzByPercentage +cusparseHpruneDense2csr_bufferSizeExt +cusparseLoggerForceDisable +cusparseLoggerOpenFile +cusparseLoggerSetCallback +cusparseLoggerSetFile +cusparseLoggerSetLevel +cusparseLoggerSetMask +cusparseRot +cusparseSDDMM +cusparseSDDMM_bufferSize +cusparseSDDMM_preprocess +cusparseSbsr2csr +cusparseSbsric02 +cusparseSbsric02_analysis +cusparseSbsric02_bufferSize +cusparseSbsrilu02 +cusparseSbsrilu02_analysis +cusparseSbsrilu02_bufferSize +cusparseSbsrilu02_numericBoost +cusparseSbsrmm +cusparseSbsrmv +cusparseSbsrsm2_analysis +cusparseSbsrsm2_bufferSize +cusparseSbsrsm2_solve +cusparseSbsrsv2_analysis +cusparseSbsrsv2_bufferSize +cusparseSbsrsv2_solve +cusparseSbsrxmv +cusparseScatter +cusparseScsr2bsr +cusparseScsr2csr_compress +cusparseScsr2csru +cusparseScsr2gebsr +cusparseScsr2gebsr_bufferSize +cusparseScsrcolor +cusparseScsrgeam2 +cusparseScsrgeam2_bufferSizeExt +cusparseScsric02 +cusparseScsric02_analysis +cusparseScsric02_bufferSize +cusparseScsrilu02 +cusparseScsrilu02_analysis +cusparseScsrilu02_bufferSize +cusparseScsrilu02_numericBoost +cusparseScsru2csr +cusparseScsru2csr_bufferSizeExt +cusparseSetMatDiagType +cusparseSetMatFillMode +cusparseSetMatIndexBase +cusparseSetMatType +cusparseSetPointerMode +cusparseSetStream +cusparseSgebsr2csr +cusparseSgebsr2gebsc +cusparseSgebsr2gebsc_bufferSize +cusparseSgebsr2gebsr +cusparseSgebsr2gebsr_bufferSize +cusparseSgemvi +cusparseSgemvi_bufferSize +cusparseSgpsvInterleavedBatch +cusparseSgpsvInterleavedBatch_bufferSizeExt +cusparseSgtsv2 +cusparseSgtsv2StridedBatch +cusparseSgtsv2StridedBatch_bufferSizeExt +cusparseSgtsv2_bufferSizeExt +cusparseSgtsv2_nopivot +cusparseSgtsv2_nopivot_bufferSizeExt +cusparseSgtsvInterleavedBatch +cusparseSgtsvInterleavedBatch_bufferSizeExt +cusparseSnnz +cusparseSnnz_compress +cusparseSpGEMM_compute +cusparseSpGEMM_copy +cusparseSpGEMM_createDescr +cusparseSpGEMM_destroyDescr +cusparseSpGEMM_estimateMemory +cusparseSpGEMM_getNumProducts +cusparseSpGEMM_workEstimation +cusparseSpGEMMreuse_compute +cusparseSpGEMMreuse_copy +cusparseSpGEMMreuse_nnz +cusparseSpGEMMreuse_workEstimation +cusparseSpMM +cusparseSpMMOp +cusparseSpMMOp_createPlan +cusparseSpMMOp_destroyPlan +cusparseSpMM_bufferSize +cusparseSpMM_preprocess +cusparseSpMV +cusparseSpMV_bufferSize +cusparseSpMatGetAttribute +cusparseSpMatGetFormat +cusparseSpMatGetIndexBase +cusparseSpMatGetSize +cusparseSpMatGetStridedBatch +cusparseSpMatGetValues +cusparseSpMatSetAttribute +cusparseSpMatSetValues +cusparseSpSM_analysis +cusparseSpSM_bufferSize +cusparseSpSM_createDescr +cusparseSpSM_destroyDescr +cusparseSpSM_solve +cusparseSpSV_analysis +cusparseSpSV_bufferSize +cusparseSpSV_createDescr +cusparseSpSV_destroyDescr +cusparseSpSV_solve +cusparseSpSV_updateMatrix +cusparseSpVV +cusparseSpVV_bufferSize +cusparseSpVecGet +cusparseSpVecGetIndexBase +cusparseSpVecGetValues +cusparseSpVecSetValues +cusparseSparseToDense +cusparseSparseToDense_bufferSize +cusparseSpruneCsr2csr +cusparseSpruneCsr2csrByPercentage +cusparseSpruneCsr2csrByPercentage_bufferSizeExt +cusparseSpruneCsr2csrNnz +cusparseSpruneCsr2csrNnzByPercentage +cusparseSpruneCsr2csr_bufferSizeExt +cusparseSpruneDense2csr +cusparseSpruneDense2csrByPercentage +cusparseSpruneDense2csrByPercentage_bufferSizeExt +cusparseSpruneDense2csrNnz +cusparseSpruneDense2csrNnzByPercentage +cusparseSpruneDense2csr_bufferSizeExt +cusparseXbsric02_zeroPivot +cusparseXbsrilu02_zeroPivot +cusparseXbsrsm2_zeroPivot +cusparseXbsrsv2_zeroPivot +cusparseXcoo2csr +cusparseXcoosortByColumn +cusparseXcoosortByRow +cusparseXcoosort_bufferSizeExt +cusparseXcscsort +cusparseXcscsort_bufferSizeExt +cusparseXcsr2bsrNnz +cusparseXcsr2coo +cusparseXcsr2gebsrNnz +cusparseXcsrgeam2Nnz +cusparseXcsric02_zeroPivot +cusparseXcsrilu02_zeroPivot +cusparseXcsrsort +cusparseXcsrsort_bufferSizeExt +cusparseXgebsr2gebsrNnz +cusparseZbsr2csr +cusparseZbsric02 +cusparseZbsric02_analysis +cusparseZbsric02_bufferSize +cusparseZbsrilu02 +cusparseZbsrilu02_analysis +cusparseZbsrilu02_bufferSize +cusparseZbsrilu02_numericBoost +cusparseZbsrmm +cusparseZbsrmv +cusparseZbsrsm2_analysis +cusparseZbsrsm2_bufferSize +cusparseZbsrsm2_solve +cusparseZbsrsv2_analysis +cusparseZbsrsv2_bufferSize +cusparseZbsrsv2_solve +cusparseZbsrxmv +cusparseZcsr2bsr +cusparseZcsr2csr_compress +cusparseZcsr2csru +cusparseZcsr2gebsr +cusparseZcsr2gebsr_bufferSize +cusparseZcsrcolor +cusparseZcsrgeam2 +cusparseZcsrgeam2_bufferSizeExt +cusparseZcsric02 +cusparseZcsric02_analysis +cusparseZcsric02_bufferSize +cusparseZcsrilu02 +cusparseZcsrilu02_analysis +cusparseZcsrilu02_bufferSize +cusparseZcsrilu02_numericBoost +cusparseZcsru2csr +cusparseZcsru2csr_bufferSizeExt +cusparseZgebsr2csr +cusparseZgebsr2gebsc +cusparseZgebsr2gebsc_bufferSize +cusparseZgebsr2gebsr +cusparseZgebsr2gebsr_bufferSize +cusparseZgemvi +cusparseZgemvi_bufferSize +cusparseZgpsvInterleavedBatch +cusparseZgpsvInterleavedBatch_bufferSizeExt +cusparseZgtsv2 +cusparseZgtsv2StridedBatch +cusparseZgtsv2StridedBatch_bufferSizeExt +cusparseZgtsv2_bufferSizeExt +cusparseZgtsv2_nopivot +cusparseZgtsv2_nopivot_bufferSizeExt +cusparseZgtsvInterleavedBatch +cusparseZgtsvInterleavedBatch_bufferSizeExt +cusparseZnnz +cusparseZnnz_compress diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_0.inc deleted file mode 100644 index 71851e05d36cc0..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_0.inc +++ /dev/null @@ -1,7832 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); - static auto func_ptr = LoadSymbol("cusparseCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); - static auto func_ptr = LoadSymbol("cusparseDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, - int *version) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); - static auto func_ptr = LoadSymbol("cusparseGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusparseGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusparseSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusparseGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, - cusparsePointerMode_t *); - static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); - static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); - static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, - const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseCopyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dest, src); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, - cusparseMatrixType_t type) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, type); -} - -cusparseMatrixType_t CUSPARSEAPI -cusparseGetMatType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatType"); - if (!func_ptr) return cusparseMatrixType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); - static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, fillMode); -} - -cusparseFillMode_t CUSPARSEAPI -cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); - if (!func_ptr) return cusparseFillMode_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, diagType); -} - -cusparseDiagType_t CUSPARSEAPI -cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); - if (!func_ptr) return cusparseDiagType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, - cusparseIndexBase_t base) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, base); -} - -cusparseIndexBase_t CUSPARSEAPI -cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); - if (!func_ptr) return cusparseIndexBase_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateSolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySolveAnalysisInfo(cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t); - static auto func_ptr = - LoadSymbol("cusparseDestroySolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetLevelInfo(cusparseHandle_t handle, cusparseSolveAnalysisInfo_t info, - int *nlevels, int **levelPtr, int **levelInd) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSolveAnalysisInfo_t, int *, int **, int **); - static auto func_ptr = LoadSymbol("cusparseGetLevelInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, nlevels, levelPtr, levelInd); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsv2Info(csrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsv2Info(csrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateHybMat(cusparseHybMat_t *hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t *); - static auto func_ptr = LoadSymbol("cusparseCreateHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyHybMat(cusparseHybMat_t hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t); - static auto func_ptr = LoadSymbol("cusparseDestroyHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateColorInfo(cusparseColorInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyColorInfo(cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t alg) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); - static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t *alg) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, - cusparseColorAlg_t *); - static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSaxpyi(cusparseHandle_t handle, int nnz, - const float *alpha, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const int *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDaxpyi(cusparseHandle_t handle, int nnz, - const double *alpha, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const int *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCaxpyi(cusparseHandle_t handle, int nnz, - const cuComplex *alpha, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, const int *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZaxpyi(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *alpha, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const int *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdoti(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - const float *y, - float *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const int *, const float *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdoti(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - const double *y, - double *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdoti(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdoti(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdotci(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdotci(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthr(cusparseHandle_t handle, int nnz, - const float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, float *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthr(cusparseHandle_t handle, int nnz, - const double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, double *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthr(cusparseHandle_t handle, int nnz, - const cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthrz(cusparseHandle_t handle, int nnz, - float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, float *, float *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthrz(cusparseHandle_t handle, int nnz, - double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, double *, double *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthrz(cusparseHandle_t handle, int nnz, - cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthrz(cusparseHandle_t handle, int nnz, - cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuDoubleComplex *, cuDoubleComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSsctr(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, - const float *, const int *, - float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDsctr(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, double *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsctr(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, cuComplex *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZsctr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSroti(cusparseHandle_t handle, int nnz, - float *xVal, const int *xInd, - float *y, const float *c, - const float *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, float *, const int *, float *, const float *, - const float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDroti(cusparseHandle_t handle, int nnz, - double *xVal, const int *xInd, - double *y, const double *c, - const double *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, double *, const int *, double *, const double *, - const double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, int nnz, const float *xVal, - const int *xInd, const float *beta, /* host or device pointer */ - float *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const float *, int, int, const float *, const int *, const float *, - float *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseSgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, int nnz, const double *xVal, - const int *xInd, const double *beta, /* host or device pointer */ - double *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const double *, int, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseDgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, int nnz, const cuComplex *xVal, - const int *xInd, const cuComplex *beta, /* host or device pointer */ - cuComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cuComplex *, int, int, const cuComplex *, const int *, - const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, int nnz, const cuDoubleComplex *xVal, - const int *xInd, const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx_bufferSize( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *x, const cuComplex *beta, - cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, const float *, - const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuComplex *, - const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybmv(cusparseHandle_t handle, cusparseOperation_t transA, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const cuComplex *, const cuComplex *, - cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const float *x, - const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const int *, const int *, int, const float *, const float *, - float *); - static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const double *x, - const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const int *, const int *, int, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const int *, const int *, int, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const int *, - const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_analysisEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const void *csrSortedValA, - cudaDataType csrSortedValAtype, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_analysisEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_solveEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const void *alpha, cudaDataType alphatype, const cusparseMatDescr_t descrA, - const void *csrSortedValA, cudaDataType csrSortedValAtype, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info, const void *f, cudaDataType ftype, - void *x, cudaDataType xtype, cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const void *, cudaDataType, - const cusparseMatDescr_t, const void *, cudaDataType, const int *, - const int *, cusparseSolveAnalysisInfo_t, const void *, cudaDataType, - void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_solveEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, alphatype, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - f, ftype, x, xtype, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle, - csrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const float *f, float *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - csrsv2Info_t, const float *, float *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const double *f, double *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - csrsv2Info_t, const double *, double *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuComplex *f, - cuComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - csrsv2Info_t, const cuComplex *, cuComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuDoubleComplex *f, - cuDoubleComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, csrsv2Info_t, const cuDoubleComplex *, cuDoubleComplex *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, - bsrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, bsrsv2Info_t, const float *, float *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, bsrsv2Info_t, const double *, double *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuComplex *f, cuComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, bsrsv2Info_t, const cuComplex *, - cuComplex *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, - const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseShybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseShybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseChybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseChybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int k, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *B, int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, const double *beta, - double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *B, int ldb, - const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, - const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, const double *, double *, - int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm2( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const float *B, - const int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const int, const float *, const int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const double *B, - const int ldb, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const int, const double *, const int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, - const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const int, const cuComplex *, const int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseCbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, - const int blockSize, const cuDoubleComplex *B, const int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, const int, const cuDoubleComplex *, const int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const float *, const float *, int, - const float *, const int *, const int *, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const double *, const double *, int, - const double *, const int *, const int *, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemmi(cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *cscValB, - const int *cscColPtrB, const int *cscRowIndB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, const int *, const int *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *cscValB, - const int *cscColPtrB, const int *cscRowIndB, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *B, int ldb, float *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *B, int ldb, double *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsm2Info(csrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsm2Info(csrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsm2_zeroPivot(cusparseHandle_t handle, - csrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_analysis( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_analysis( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_analysis( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_analysis( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_solve( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, float *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, float *, int, csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_solve( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, double *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_solve( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_solve( - cusparseHandle_t handle, int algo, /* algo = 0, 1 */ - cusparseOperation_t transA, cusparseOperation_t transB, int m, int nrhs, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, int, - csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, - bsrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const float *B, int ldb, float *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const double *B, int ldb, double *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrilu0Ex( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, void *csrSortedValA_ValM, - cudaDataType csrSortedValA_ValMtype, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info, cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrilu0Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedValA_ValMtype, csrSortedRowPtrA, csrSortedColIndA, - info, executiontype); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, float *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, double *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( - cusparseHandle_t handle, csrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( - cusparseHandle_t handle, bsrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsric0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, float *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsric0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, double *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsric0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, - csric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, - bsric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgtsv_nopivot(cusparseHandle_t handle, int m, int n, const float *dl, - const float *d, const float *du, float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv_nopivot(cusparseHandle_t handle, int m, int n, const double *dl, - const double *d, const double *du, double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgtsv_nopivot(cusparseHandle_t handle, int m, int n, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvStridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int); - static auto func_ptr = LoadSymbol("cusparseSgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvStridedBatch( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, double *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int); - static auto func_ptr = LoadSymbol("cusparseDgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseCgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseZgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - const float *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - const double *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, - const double *d, const double *du, double *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *dl, float *d, float *du, - float *x, int batchCount, void *pBuffer) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, float *, - float *, float *, float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *dl, double *d, double *du, - double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - double *, double *, double *, - double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *dl, cuComplex *d, - cuComplex *du, cuComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *dl, - cuDoubleComplex *d, cuDoubleComplex *du, cuDoubleComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *ds, const float *dl, - const float *d, const float *du, const float *dw, const float *x, - int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, const float *, const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *ds, - const double *dl, const double *d, const double *du, const double *dw, - const double *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, const double *, const double *, int, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *ds, - const cuComplex *dl, const cuComplex *d, const cuComplex *du, - const cuComplex *dw, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, const cuComplex *, - const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *ds, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, const cuDoubleComplex *dw, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *ds, float *dl, float *d, - float *du, float *dw, float *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, float *, float *, float *, float *, float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *ds, double *dl, double *d, - double *du, double *dw, double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, double *, double *, double *, double *, - double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *ds, cuComplex *dl, - cuComplex *d, cuComplex *du, cuComplex *dw, cuComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *ds, - cuDoubleComplex *dl, cuDoubleComplex *d, cuDoubleComplex *du, - cuDoubleComplex *dw, cuDoubleComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseXcsrgemmNnz(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemmNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const float *, const int *, - const int *, const cusparseMatDescr_t, const int, const float *, - const int *, const int *, const cusparseMatDescr_t, float *, const int *, - int *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cusparseMatDescr_t, cuComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrgemm2Info(csrgemm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrgemm2Info(csrgemm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, csrgemm2Info_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const float *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const double *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuDoubleComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuDoubleComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgemm2Nnz( - cusparseHandle_t handle, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, int, - const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemm2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const float *csrSortedValD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const double *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const cuComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, - const cusparseMatDescr_t, cuComplex *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrD, int nnzD, - const cuDoubleComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, cuDoubleComplex *, const int *, - int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeamNnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeamNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, const cuComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeam2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, workspace); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const double *, int *, - int *, int *, const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const float *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const float *, - const int *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const double *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const double *, - const int *, int *, int *, double); - static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, - const int *, int *, int *, cuComplex); - static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - int *nnzPerRow, int *nnzC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, - const int *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - float *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, int, const int *, float *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - double *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, int, const int *, double *, int *, int *, - double); - static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuComplex *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const int *, cuComplex *, int *, int *, - cuComplex); - static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuDoubleComplex *csrSortedValC, int *csrSortedColIndC, - int *csrSortedRowPtrC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, const int *, - cuDoubleComplex *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, float *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, double *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cuComplex *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerRow, - cuDoubleComplex *csrSortedValA, int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerCol, float *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerCol, double *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerCol, cuComplex *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerCol, - cuDoubleComplex *cscSortedValA, int *cscSortedRowIndA, - int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, - const int *cooRowInd, int nnz, - int m, int *csrSortedRowPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, - const int *csrSortedRowPtr, - int nnz, int m, int *cooRowInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrSortedVal, - cudaDataType csrSortedValtype, const int *csrSortedRowPtr, - const int *csrSortedColInd, void *cscSortedVal, - cudaDataType cscSortedValtype, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, cudaDataType, const int *, - const int *, void *, cudaDataType, int *, int *, cusparseAction_t, - cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedValtype, - csrSortedRowPtr, csrSortedColInd, cscSortedVal, - cscSortedValtype, cscSortedRowInd, cscSortedColPtr, - copyValues, idxBase, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const float *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, float *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - float *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseScsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const double *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, - double *cscSortedVal, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - double *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsr2csc(cusparseHandle_t handle, int m, int n, int nnz, - const cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, - const cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseSdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZdense2hyb(cusparseHandle_t handle, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, - int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int); - static auto func_ptr = LoadSymbol("cusparseShyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int); - static auto func_ptr = LoadSymbol("cusparseDhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseChyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, - int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, int, const cusparseMatDescr_t, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, const cusparseMatDescr_t, int *, int, int, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, - colBlockDim, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, const cusparseMatDescr_t, - float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, const cusparseMatDescr_t, - double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, const cusparseMatDescr_t, - cuComplex *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, - const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, - int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const int *, const int *, int, int, - const cusparseMatDescr_t, int *, int, int, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, - bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, - cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, - int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, - int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); - static auto func_ptr = - LoadSymbol("cusparseCreateIdentityPermutation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, p); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, - const int *cooColsA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, int *cooColsA, - int *P, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, - int *cooColsA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, - const int *csrColIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *csrRowPtrA, - int *csrColIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, - const int *cscRowIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *cscColPtrA, - int *cscRowIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcscsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, float *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, double *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - const float *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - const double *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, const float *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const double *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, const double *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, const float *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const double *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, const double *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, float *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, double *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_1.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_1.inc deleted file mode 100644 index 03d6d0c20d5223..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_1.inc +++ /dev/null @@ -1,8262 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); - static auto func_ptr = LoadSymbol("cusparseCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); - static auto func_ptr = LoadSymbol("cusparseDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, - int *version) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); - static auto func_ptr = LoadSymbol("cusparseGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusparseGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -const char *CUSPARSEAPI cusparseGetErrorName(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorName"); - if (!func_ptr) return "cusparseGetErrorName symbol not found."; - return func_ptr(status); -} - -const char *CUSPARSEAPI cusparseGetErrorString(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorString"); - if (!func_ptr) return "cusparseGetErrorString symbol not found."; - return func_ptr(status); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusparseSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusparseGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, - cusparsePointerMode_t *); - static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); - static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); - static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, - const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseCopyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dest, src); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, - cusparseMatrixType_t type) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, type); -} - -cusparseMatrixType_t CUSPARSEAPI -cusparseGetMatType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatType"); - if (!func_ptr) return cusparseMatrixType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); - static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, fillMode); -} - -cusparseFillMode_t CUSPARSEAPI -cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); - if (!func_ptr) return cusparseFillMode_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, diagType); -} - -cusparseDiagType_t CUSPARSEAPI -cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); - if (!func_ptr) return cusparseDiagType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, - cusparseIndexBase_t base) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, base); -} - -cusparseIndexBase_t CUSPARSEAPI -cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); - if (!func_ptr) return cusparseIndexBase_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateSolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySolveAnalysisInfo(cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t); - static auto func_ptr = - LoadSymbol("cusparseDestroySolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetLevelInfo(cusparseHandle_t handle, cusparseSolveAnalysisInfo_t info, - int *nlevels, int **levelPtr, int **levelInd) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSolveAnalysisInfo_t, int *, int **, int **); - static auto func_ptr = LoadSymbol("cusparseGetLevelInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, nlevels, levelPtr, levelInd); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsv2Info(csrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsv2Info(csrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateHybMat(cusparseHybMat_t *hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t *); - static auto func_ptr = LoadSymbol("cusparseCreateHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyHybMat(cusparseHybMat_t hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t); - static auto func_ptr = LoadSymbol("cusparseDestroyHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateColorInfo(cusparseColorInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyColorInfo(cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t alg) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); - static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t *alg) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, - cusparseColorAlg_t *); - static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSaxpyi(cusparseHandle_t handle, int nnz, - const float *alpha, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const int *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDaxpyi(cusparseHandle_t handle, int nnz, - const double *alpha, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const int *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCaxpyi(cusparseHandle_t handle, int nnz, - const cuComplex *alpha, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, const int *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZaxpyi(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *alpha, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const int *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdoti(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - const float *y, - float *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const int *, const float *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdoti(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - const double *y, - double *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdoti(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdoti(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdotci(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdotci(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthr(cusparseHandle_t handle, int nnz, - const float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, float *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthr(cusparseHandle_t handle, int nnz, - const double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, double *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthr(cusparseHandle_t handle, int nnz, - const cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthrz(cusparseHandle_t handle, int nnz, - float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, float *, float *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthrz(cusparseHandle_t handle, int nnz, - double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, double *, double *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthrz(cusparseHandle_t handle, int nnz, - cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthrz(cusparseHandle_t handle, int nnz, - cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuDoubleComplex *, cuDoubleComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSsctr(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, - const float *, const int *, - float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDsctr(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, double *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsctr(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, cuComplex *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZsctr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSroti(cusparseHandle_t handle, int nnz, - float *xVal, const int *xInd, - float *y, const float *c, - const float *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, float *, const int *, float *, const float *, - const float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDroti(cusparseHandle_t handle, int nnz, - double *xVal, const int *xInd, - double *y, const double *c, - const double *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, double *, const int *, double *, const double *, - const double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const float *alpha, const float *A, int lda, int nnz, - const float *xVal, const int *xInd, const float *beta, float *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const float *, int, int, const float *, const int *, const float *, - float *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseSgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const double *alpha, const double *A, int lda, int nnz, - const double *xVal, const int *xInd, const double *beta, - double *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const double *, int, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseDgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cuComplex *A, int lda, int nnz, - const cuComplex *xVal, const int *xInd, const cuComplex *beta, cuComplex *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cuComplex *, int, int, const cuComplex *, const int *, - const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, int nnz, - const cuDoubleComplex *xVal, const int *xInd, const cuDoubleComplex *beta, - cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx_bufferSize( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *x, const cuComplex *beta, - cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, const float *, - const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuComplex *, - const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybmv(cusparseHandle_t handle, cusparseOperation_t transA, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const cuComplex *, const cuComplex *, - cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const float *x, - const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const int *, const int *, int, const float *, const float *, - float *); - static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const double *x, - const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const int *, const int *, int, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const int *, const int *, int, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const int *, - const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_analysisEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const void *csrSortedValA, - cudaDataType csrSortedValAtype, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_analysisEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_solveEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const void *alpha, cudaDataType alphatype, const cusparseMatDescr_t descrA, - const void *csrSortedValA, cudaDataType csrSortedValAtype, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info, const void *f, cudaDataType ftype, - void *x, cudaDataType xtype, cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const void *, cudaDataType, - const cusparseMatDescr_t, const void *, cudaDataType, const int *, - const int *, cusparseSolveAnalysisInfo_t, const void *, cudaDataType, - void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_solveEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, alphatype, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - f, ftype, x, xtype, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle, - csrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const float *f, float *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - csrsv2Info_t, const float *, float *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const double *f, double *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - csrsv2Info_t, const double *, double *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuComplex *f, - cuComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - csrsv2Info_t, const cuComplex *, cuComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuDoubleComplex *f, - cuDoubleComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, csrsv2Info_t, const cuDoubleComplex *, cuDoubleComplex *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, - bsrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, bsrsv2Info_t, const float *, float *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, bsrsv2Info_t, const double *, double *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuComplex *f, cuComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, bsrsv2Info_t, const cuComplex *, - cuComplex *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, - const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseShybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseShybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseChybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseChybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int k, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *B, int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, const double *beta, - double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *B, int ldb, - const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, - const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, const double *, double *, - int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm2( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const float *B, - const int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const int, const float *, const int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const double *B, - const int ldb, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const int, const double *, const int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, - const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const int, const cuComplex *, const int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseCbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, - const int blockSize, const cuDoubleComplex *B, const int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, const int, const cuDoubleComplex *, const int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, const float *alpha, - const float *A, int lda, const float *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const float *, const float *, int, - const float *, const int *, const int *, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, const double *alpha, - const double *A, int lda, const double *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const double *, const double *, int, - const double *, const int *, const int *, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuComplex *alpha, const cuComplex *A, int lda, - const cuComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, const int *, const int *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemmi(cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *B, int ldb, float *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *B, int ldb, double *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsm2Info(csrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsm2Info(csrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsm2_zeroPivot(cusparseHandle_t handle, - csrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, float *, int, csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, int, - csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, - bsrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const float *B, int ldb, float *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const double *B, int ldb, double *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrilu0Ex( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, void *csrSortedValA_ValM, - cudaDataType csrSortedValA_ValMtype, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrilu0Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedValA_ValMtype, csrSortedRowPtrA, csrSortedColIndA, - info, executiontype); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, float *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, double *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( - cusparseHandle_t handle, csrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( - cusparseHandle_t handle, bsrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric0(cusparseHandle_t handle, - cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, - float *csrSortedValA_ValM, - const int *csrSortedRowPtrA, - const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric0(cusparseHandle_t handle, - cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, - double *csrSortedValA_ValM, - const int *csrSortedRowPtrA, - const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric0(cusparseHandle_t handle, - cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, - const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, - csric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, - bsric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgtsv_nopivot(cusparseHandle_t handle, int m, int n, const float *dl, - const float *d, const float *du, float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv_nopivot(cusparseHandle_t handle, int m, int n, const double *dl, - const double *d, const double *du, double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgtsv_nopivot(cusparseHandle_t handle, int m, int n, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvStridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int); - static auto func_ptr = LoadSymbol("cusparseSgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvStridedBatch( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, double *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int); - static auto func_ptr = LoadSymbol("cusparseDgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseCgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseZgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - const float *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - const double *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, - const double *d, const double *du, double *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *dl, float *d, float *du, - float *x, int batchCount, void *pBuffer) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, float *, - float *, float *, float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *dl, double *d, double *du, - double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - double *, double *, double *, - double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *dl, cuComplex *d, - cuComplex *du, cuComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *dl, - cuDoubleComplex *d, cuDoubleComplex *du, cuDoubleComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *ds, const float *dl, - const float *d, const float *du, const float *dw, const float *x, - int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, const float *, const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *ds, - const double *dl, const double *d, const double *du, const double *dw, - const double *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, const double *, const double *, int, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *ds, - const cuComplex *dl, const cuComplex *d, const cuComplex *du, - const cuComplex *dw, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, const cuComplex *, - const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *ds, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, const cuDoubleComplex *dw, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *ds, float *dl, float *d, - float *du, float *dw, float *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, float *, float *, float *, float *, float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *ds, double *dl, double *d, - double *du, double *dw, double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, double *, double *, double *, double *, - double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *ds, cuComplex *dl, - cuComplex *d, cuComplex *du, cuComplex *dw, cuComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *ds, - cuDoubleComplex *dl, cuDoubleComplex *d, cuDoubleComplex *du, - cuDoubleComplex *dw, cuDoubleComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseXcsrgemmNnz(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemmNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const float *, const int *, - const int *, const cusparseMatDescr_t, const int, const float *, - const int *, const int *, const cusparseMatDescr_t, float *, const int *, - int *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cusparseMatDescr_t, cuComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrgemm2Info(csrgemm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrgemm2Info(csrgemm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, csrgemm2Info_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const float *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const double *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuDoubleComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuDoubleComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgemm2Nnz( - cusparseHandle_t handle, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, int, - const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemm2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const float *csrSortedValD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const double *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const cuComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, - const cusparseMatDescr_t, cuComplex *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrD, int nnzD, - const cuDoubleComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, cuDoubleComplex *, const int *, - int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeamNnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeamNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, const cuComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeam2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, workspace); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const double *, int *, - int *, int *, const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const float *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const float *, - const int *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const double *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const double *, - const int *, int *, int *, double); - static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, - const int *, int *, int *, cuComplex); - static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - int *nnzPerRow, int *nnzC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, - const int *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - float *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, int, const int *, float *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - double *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, int, const int *, double *, int *, int *, - double); - static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuComplex *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const int *, cuComplex *, int *, int *, - cuComplex); - static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuDoubleComplex *csrSortedValC, int *csrSortedColIndC, - int *csrSortedRowPtrC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, const int *, - cuDoubleComplex *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, float *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, double *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cuComplex *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerRow, - cuDoubleComplex *csrSortedValA, int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerCol, float *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerCol, double *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerCol, cuComplex *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerCol, - cuDoubleComplex *cscSortedValA, int *cscSortedRowIndA, - int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, - const int *cooRowInd, int nnz, - int m, int *csrSortedRowPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, - const int *csrSortedRowPtr, - int nnz, int m, int *cooRowInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrSortedVal, - cudaDataType csrSortedValtype, const int *csrSortedRowPtr, - const int *csrSortedColInd, void *cscSortedVal, - cudaDataType cscSortedValtype, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, cudaDataType, const int *, - const int *, void *, cudaDataType, int *, int *, cusparseAction_t, - cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedValtype, - csrSortedRowPtr, csrSortedColInd, cscSortedVal, - cscSortedValtype, cscSortedRowInd, cscSortedColPtr, - copyValues, idxBase, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const float *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, float *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - float *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseScsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const double *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, - double *cscSortedVal, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - double *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsr2csc(cusparseHandle_t handle, int m, int n, int nnz, - const cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, - const cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseSdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZdense2hyb(cusparseHandle_t handle, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, - int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int); - static auto func_ptr = LoadSymbol("cusparseShyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int); - static auto func_ptr = LoadSymbol("cusparseDhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseChyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, - int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, int, const cusparseMatDescr_t, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, const cusparseMatDescr_t, int *, int, int, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, - colBlockDim, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, const cusparseMatDescr_t, - float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, const cusparseMatDescr_t, - double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, const cusparseMatDescr_t, - cuComplex *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, - const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, - int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const int *, const int *, int, int, - const cusparseMatDescr_t, int *, int, int, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, - bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, - cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, - int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, - int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); - static auto func_ptr = - LoadSymbol("cusparseCreateIdentityPermutation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, p); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, - const int *cooColsA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, int *cooColsA, - int *P, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, - int *cooColsA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, - const int *csrColIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *csrRowPtrA, - int *csrColIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, - const int *cscRowIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *cscColPtrA, - int *cscRowIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcscsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, float *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, double *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - const float *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - const double *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, const float *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, const double *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, const float *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const double *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, const double *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, float *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, double *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - bufferSize); -} - -#if !defined(_WIN32) - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSpVec(cusparseSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, - void *indices, void *values, cusparseIndexType_t idxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpVecDescr_t *, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateSpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpVec(cusparseSpVecDescr_t spVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGet( - const cusparseSpVecDescr_t spVecDescr, int64_t *size, int64_t *nnz, - void **indices, void **values, cusparseIndexType_t *idxType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpVecDescr_t, int64_t *, int64_t *, void **, void **, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseSpVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGetIndexBase( - const cusparseSpVecDescr_t spVecDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpVecDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpVecGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecGetValues(const cusparseSpVecDescr_t spVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecSetValues(cusparseSpVecDescr_t spVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, - void *values, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnVecDescr_t *, int64_t, void *, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnVec(cusparseDnVecDescr_t dnVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecGet(const cusparseDnVecDescr_t dnVecDescr, int64_t *size, - void **values, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseDnVecDescr_t, int64_t *, void **, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseDnVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecGetValues(const cusparseDnVecDescr_t dnVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseDnVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecSetValues(cusparseDnVecDescr_t dnVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t *spMatDescr, - int64_t rows, int64_t cols, - int64_t nnz, void *cooRowInd, - void *cooColInd, void *cooValues, - cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCoo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - cooIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsr( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *csrRowOffsets, void *csrColInd, void *csrValues, - cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCooAoS( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *cooInd, void *cooValues, cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCooAoS"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooInd, cooValues, cooIdxType, - idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpMat(cusparseSpMatDescr_t spMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCooGet(const cusparseSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz, - void **cooRowInd, // COO row indices - void **cooColInd, // COO column indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - void **, void **, cusparseIndexType_t *, cusparseIndexBase_t *, - cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - idxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCooAoSGet(const cusparseSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz, - void **cooInd, // COO indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - void **, cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooAoSGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooInd, cooValues, idxType, - idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrGet( - const cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *nnz, void **csrRowOffsets, void **csrColInd, void **csrValues, - cusparseIndexType_t *csrRowOffsetsType, cusparseIndexType_t *csrColIndType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - void **, void **, cusparseIndexType_t *, cusparseIndexType_t *, - cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCsrGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetFormat( - const cusparseSpMatDescr_t spMatDescr, cusparseFormat_t *format) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, - cusparseFormat_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, format); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetIndexBase( - const cusparseSpMatDescr_t spMatDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetValues(const cusparseSpMatDescr_t spMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetValues(cusparseSpMatDescr_t spMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetStridedBatch(cusparseSpMatDescr_t spMatDescr, int batchCount) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int); - static auto func_ptr = LoadSymbol("cusparseSpMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetStridedBatch( - const cusparseSpMatDescr_t spMatDescr, int *batchCount) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, int *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateDnMat( - cusparseDnMatDescr_t *dnMatDescr, int64_t rows, int64_t cols, int64_t ld, - void *values, cudaDataType valueType, cusparseOrder_t order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnMatDescr_t *, int64_t, int64_t, int64_t, void *, cudaDataType, - cusparseOrder_t); - static auto func_ptr = LoadSymbol("cusparseCreateDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnMat(cusparseDnMatDescr_t dnMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatGet( - const cusparseDnMatDescr_t dnMatDescr, int64_t *rows, int64_t *cols, - int64_t *ld, void **values, cudaDataType *type, cusparseOrder_t *order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseDnMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - cudaDataType *, cusparseOrder_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetValues(const cusparseDnMatDescr_t dnMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseDnMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatSetValues(cusparseDnMatDescr_t dnMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatSetStridedBatch( - cusparseDnMatDescr_t dnMatDescr, int batchCount, int64_t batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, int, int64_t); - static auto func_ptr = LoadSymbol("cusparseDnMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetStridedBatch(const cusparseDnMatDescr_t dnMatDescr, - int *batchCount, int64_t *batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseDnMatDescr_t, - int *, int64_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVV(cusparseHandle_t handle, cusparseOperation_t opX, - const cusparseSpVecDescr_t vecX, const cusparseDnVecDescr_t vecY, - void *result, cudaDataType computeType, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseSpVecDescr_t, - const cusparseDnVecDescr_t, void *, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseSpVV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opX, - const cusparseSpVecDescr_t vecX, const cusparseDnVecDescr_t vecY, - const void *result, cudaDataType computeType, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseSpVecDescr_t, - const cusparseDnVecDescr_t, const void *, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpVV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - const cusparseSpMatDescr_t matA, const cusparseDnVecDescr_t vecX, - const void *beta, const cusparseDnVecDescr_t vecY, cudaDataType computeType, - cusparseSpMVAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnVecDescr_t, const void *, - const cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - const cusparseSpMatDescr_t matA, const cusparseDnVecDescr_t vecX, - const void *beta, const cusparseDnVecDescr_t vecY, cudaDataType computeType, - cusparseSpMVAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnVecDescr_t, const void *, - const cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseDnMatDescr_t matC, cudaDataType computeType, cusparseSpMMAlg_t alg, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseDnMatDescr_t matC, cudaDataType computeType, cusparseSpMMAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseDnMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseSpMatDescr_t matC, cudaDataType computeType, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseDnMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseConstrainedGeMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseDnMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseSpMatDescr_t matC, cudaDataType computeType, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseDnMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, size_t *); - static auto func_ptr = - LoadSymbol("cusparseConstrainedGeMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - bufferSize); -} - -#endif // _WIN32 - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_2.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_2.inc deleted file mode 100644 index 03d6d0c20d5223..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_10_2.inc +++ /dev/null @@ -1,8262 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); - static auto func_ptr = LoadSymbol("cusparseCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); - static auto func_ptr = LoadSymbol("cusparseDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, - int *version) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); - static auto func_ptr = LoadSymbol("cusparseGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusparseGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -const char *CUSPARSEAPI cusparseGetErrorName(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorName"); - if (!func_ptr) return "cusparseGetErrorName symbol not found."; - return func_ptr(status); -} - -const char *CUSPARSEAPI cusparseGetErrorString(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorString"); - if (!func_ptr) return "cusparseGetErrorString symbol not found."; - return func_ptr(status); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusparseSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusparseGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, - cusparsePointerMode_t *); - static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); - static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); - static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, - const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseCopyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dest, src); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, - cusparseMatrixType_t type) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, type); -} - -cusparseMatrixType_t CUSPARSEAPI -cusparseGetMatType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatType"); - if (!func_ptr) return cusparseMatrixType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); - static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, fillMode); -} - -cusparseFillMode_t CUSPARSEAPI -cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); - if (!func_ptr) return cusparseFillMode_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, diagType); -} - -cusparseDiagType_t CUSPARSEAPI -cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); - if (!func_ptr) return cusparseDiagType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, - cusparseIndexBase_t base) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, base); -} - -cusparseIndexBase_t CUSPARSEAPI -cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); - if (!func_ptr) return cusparseIndexBase_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateSolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySolveAnalysisInfo(cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t); - static auto func_ptr = - LoadSymbol("cusparseDestroySolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetLevelInfo(cusparseHandle_t handle, cusparseSolveAnalysisInfo_t info, - int *nlevels, int **levelPtr, int **levelInd) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSolveAnalysisInfo_t, int *, int **, int **); - static auto func_ptr = LoadSymbol("cusparseGetLevelInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, nlevels, levelPtr, levelInd); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsv2Info(csrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsv2Info(csrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateHybMat(cusparseHybMat_t *hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t *); - static auto func_ptr = LoadSymbol("cusparseCreateHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyHybMat(cusparseHybMat_t hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t); - static auto func_ptr = LoadSymbol("cusparseDestroyHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateColorInfo(cusparseColorInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyColorInfo(cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t alg) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); - static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t *alg) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, - cusparseColorAlg_t *); - static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSaxpyi(cusparseHandle_t handle, int nnz, - const float *alpha, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const int *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDaxpyi(cusparseHandle_t handle, int nnz, - const double *alpha, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const int *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCaxpyi(cusparseHandle_t handle, int nnz, - const cuComplex *alpha, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, const int *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZaxpyi(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *alpha, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const int *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdoti(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - const float *y, - float *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const int *, const float *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdoti(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - const double *y, - double *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdoti(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdoti(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdotci(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdotci(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthr(cusparseHandle_t handle, int nnz, - const float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, float *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthr(cusparseHandle_t handle, int nnz, - const double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, double *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthr(cusparseHandle_t handle, int nnz, - const cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthrz(cusparseHandle_t handle, int nnz, - float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, float *, float *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthrz(cusparseHandle_t handle, int nnz, - double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, double *, double *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthrz(cusparseHandle_t handle, int nnz, - cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthrz(cusparseHandle_t handle, int nnz, - cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuDoubleComplex *, cuDoubleComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSsctr(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, - const float *, const int *, - float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDsctr(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, double *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsctr(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, cuComplex *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZsctr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSroti(cusparseHandle_t handle, int nnz, - float *xVal, const int *xInd, - float *y, const float *c, - const float *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, float *, const int *, float *, const float *, - const float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDroti(cusparseHandle_t handle, int nnz, - double *xVal, const int *xInd, - double *y, const double *c, - const double *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, double *, const int *, double *, const double *, - const double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const float *alpha, const float *A, int lda, int nnz, - const float *xVal, const int *xInd, const float *beta, float *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const float *, int, int, const float *, const int *, const float *, - float *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseSgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const double *alpha, const double *A, int lda, int nnz, - const double *xVal, const int *xInd, const double *beta, - double *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const double *, int, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseDgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cuComplex *A, int lda, int nnz, - const cuComplex *xVal, const int *xInd, const cuComplex *beta, cuComplex *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cuComplex *, int, int, const cuComplex *, const int *, - const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, int nnz, - const cuDoubleComplex *xVal, const int *xInd, const cuDoubleComplex *beta, - cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx_bufferSize( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *x, const cuComplex *beta, - cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, const float *, - const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuComplex *, - const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybmv(cusparseHandle_t handle, cusparseOperation_t transA, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const cuComplex *, const cuComplex *, - cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const float *x, - const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const int *, const int *, int, const float *, const float *, - float *); - static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const double *x, - const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const int *, const int *, int, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const int *, const int *, int, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const int *, - const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_analysisEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const void *csrSortedValA, - cudaDataType csrSortedValAtype, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_analysisEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_solveEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const void *alpha, cudaDataType alphatype, const cusparseMatDescr_t descrA, - const void *csrSortedValA, cudaDataType csrSortedValAtype, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info, const void *f, cudaDataType ftype, - void *x, cudaDataType xtype, cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const void *, cudaDataType, - const cusparseMatDescr_t, const void *, cudaDataType, const int *, - const int *, cusparseSolveAnalysisInfo_t, const void *, cudaDataType, - void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_solveEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, alphatype, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - f, ftype, x, xtype, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle, - csrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const float *f, float *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - csrsv2Info_t, const float *, float *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const double *f, double *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - csrsv2Info_t, const double *, double *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuComplex *f, - cuComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - csrsv2Info_t, const cuComplex *, cuComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuDoubleComplex *f, - cuDoubleComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, csrsv2Info_t, const cuDoubleComplex *, cuDoubleComplex *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, - bsrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, bsrsv2Info_t, const float *, float *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, bsrsv2Info_t, const double *, double *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuComplex *f, cuComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, bsrsv2Info_t, const cuComplex *, - cuComplex *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, - const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseShybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseShybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseChybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseChybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descrA, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int k, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *B, int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, const double *beta, - double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *B, int ldb, - const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, - const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, const double *, double *, - int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm2( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const float *B, - const int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const int, const float *, const int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const double *B, - const int ldb, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const int, const double *, const int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, - const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const int, const cuComplex *, const int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseCbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, - const int blockSize, const cuDoubleComplex *B, const int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, const int, const cuDoubleComplex *, const int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, const float *alpha, - const float *A, int lda, const float *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const float *, const float *, int, - const float *, const int *, const int *, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, const double *alpha, - const double *A, int lda, const double *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const double *, const double *, int, - const double *, const int *, const int *, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuComplex *alpha, const cuComplex *A, int lda, - const cuComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, const int *, const int *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemmi(cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *B, int ldb, float *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *B, int ldb, double *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, B, ldb, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsm2Info(csrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsm2Info(csrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsm2_zeroPivot(cusparseHandle_t handle, - csrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, float *, int, csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, int, - csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, - bsrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const float *B, int ldb, float *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const double *B, int ldb, double *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrilu0Ex( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, void *csrSortedValA_ValM, - cudaDataType csrSortedValA_ValMtype, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrilu0Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedValA_ValMtype, csrSortedRowPtrA, csrSortedColIndA, - info, executiontype); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, float *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, double *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( - cusparseHandle_t handle, csrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( - cusparseHandle_t handle, bsrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric0(cusparseHandle_t handle, - cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, - float *csrSortedValA_ValM, - const int *csrSortedRowPtrA, - const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric0(cusparseHandle_t handle, - cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, - double *csrSortedValA_ValM, - const int *csrSortedRowPtrA, - const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric0(cusparseHandle_t handle, - cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, - const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, - csric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, - bsric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgtsv_nopivot(cusparseHandle_t handle, int m, int n, const float *dl, - const float *d, const float *du, float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv_nopivot(cusparseHandle_t handle, int m, int n, const double *dl, - const double *d, const double *du, double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgtsv_nopivot(cusparseHandle_t handle, int m, int n, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvStridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int); - static auto func_ptr = LoadSymbol("cusparseSgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvStridedBatch( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, double *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int); - static auto func_ptr = LoadSymbol("cusparseDgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseCgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseZgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - const float *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - const double *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, - const double *d, const double *du, double *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *dl, float *d, float *du, - float *x, int batchCount, void *pBuffer) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, float *, - float *, float *, float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *dl, double *d, double *du, - double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - double *, double *, double *, - double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *dl, cuComplex *d, - cuComplex *du, cuComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *dl, - cuDoubleComplex *d, cuDoubleComplex *du, cuDoubleComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *ds, const float *dl, - const float *d, const float *du, const float *dw, const float *x, - int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, const float *, const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *ds, - const double *dl, const double *d, const double *du, const double *dw, - const double *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, const double *, const double *, int, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *ds, - const cuComplex *dl, const cuComplex *d, const cuComplex *du, - const cuComplex *dw, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, const cuComplex *, - const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *ds, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, const cuDoubleComplex *dw, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *ds, float *dl, float *d, - float *du, float *dw, float *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, float *, float *, float *, float *, float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *ds, double *dl, double *d, - double *du, double *dw, double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, double *, double *, double *, double *, - double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *ds, cuComplex *dl, - cuComplex *d, cuComplex *du, cuComplex *dw, cuComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *ds, - cuDoubleComplex *dl, cuDoubleComplex *d, cuDoubleComplex *du, - cuDoubleComplex *dw, cuDoubleComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseXcsrgemmNnz(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemmNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const float *, const int *, - const int *, const cusparseMatDescr_t, const int, const float *, - const int *, const int *, const cusparseMatDescr_t, float *, const int *, - int *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cusparseMatDescr_t, cuComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrgemm2Info(csrgemm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrgemm2Info(csrgemm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, csrgemm2Info_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const float *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const double *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuDoubleComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuDoubleComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgemm2Nnz( - cusparseHandle_t handle, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, int, - const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemm2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const float *csrSortedValD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const double *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const cuComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, - const cusparseMatDescr_t, cuComplex *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrD, int nnzD, - const cuDoubleComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, cuDoubleComplex *, const int *, - int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeamNnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeamNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, const cuComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeam2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, workspace); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const double *, int *, - int *, int *, const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const float *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const float *, - const int *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const double *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const double *, - const int *, int *, int *, double); - static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, - const int *, int *, int *, cuComplex); - static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - int *nnzPerRow, int *nnzC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, - const int *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - float *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, int, const int *, float *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - double *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, int, const int *, double *, int *, int *, - double); - static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuComplex *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const int *, cuComplex *, int *, int *, - cuComplex); - static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuDoubleComplex *csrSortedValC, int *csrSortedColIndC, - int *csrSortedRowPtrC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, const int *, - cuDoubleComplex *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, float *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, double *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cuComplex *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerRow, - cuDoubleComplex *csrSortedValA, int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerCol, float *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerCol, double *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerCol, cuComplex *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerCol, - cuDoubleComplex *cscSortedValA, int *cscSortedRowIndA, - int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, - const int *cooRowInd, int nnz, - int m, int *csrSortedRowPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, - const int *csrSortedRowPtr, - int nnz, int m, int *cooRowInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrSortedVal, - cudaDataType csrSortedValtype, const int *csrSortedRowPtr, - const int *csrSortedColInd, void *cscSortedVal, - cudaDataType cscSortedValtype, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, cudaDataType, const int *, - const int *, void *, cudaDataType, int *, int *, cusparseAction_t, - cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedValtype, - csrSortedRowPtr, csrSortedColInd, cscSortedVal, - cscSortedValtype, cscSortedRowInd, cscSortedColPtr, - copyValues, idxBase, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const float *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, float *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - float *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseScsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const double *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, - double *cscSortedVal, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - double *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsr2csc(cusparseHandle_t handle, int m, int n, int nnz, - const cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, - const cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseSdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZdense2hyb(cusparseHandle_t handle, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, - int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int); - static auto func_ptr = LoadSymbol("cusparseShyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int); - static auto func_ptr = LoadSymbol("cusparseDhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseChyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, - int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, int, const cusparseMatDescr_t, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, const cusparseMatDescr_t, int *, int, int, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, - colBlockDim, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, const cusparseMatDescr_t, - float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, const cusparseMatDescr_t, - double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, const cusparseMatDescr_t, - cuComplex *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, - const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, - int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const int *, const int *, int, int, - const cusparseMatDescr_t, int *, int, int, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, - bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, - cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, - int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, - int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); - static auto func_ptr = - LoadSymbol("cusparseCreateIdentityPermutation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, p); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, - const int *cooColsA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, int *cooColsA, - int *P, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, - int *cooColsA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, - const int *csrColIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *csrRowPtrA, - int *csrColIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, - const int *cscRowIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *cscColPtrA, - int *cscRowIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcscsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, float *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, double *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - const float *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - const double *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, const float *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, const double *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, const float *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const double *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, const double *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, float *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, double *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - bufferSize); -} - -#if !defined(_WIN32) - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSpVec(cusparseSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, - void *indices, void *values, cusparseIndexType_t idxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpVecDescr_t *, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateSpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpVec(cusparseSpVecDescr_t spVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGet( - const cusparseSpVecDescr_t spVecDescr, int64_t *size, int64_t *nnz, - void **indices, void **values, cusparseIndexType_t *idxType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpVecDescr_t, int64_t *, int64_t *, void **, void **, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseSpVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGetIndexBase( - const cusparseSpVecDescr_t spVecDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpVecDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpVecGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecGetValues(const cusparseSpVecDescr_t spVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecSetValues(cusparseSpVecDescr_t spVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, - void *values, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnVecDescr_t *, int64_t, void *, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnVec(cusparseDnVecDescr_t dnVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecGet(const cusparseDnVecDescr_t dnVecDescr, int64_t *size, - void **values, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseDnVecDescr_t, int64_t *, void **, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseDnVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecGetValues(const cusparseDnVecDescr_t dnVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseDnVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecSetValues(cusparseDnVecDescr_t dnVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t *spMatDescr, - int64_t rows, int64_t cols, - int64_t nnz, void *cooRowInd, - void *cooColInd, void *cooValues, - cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCoo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - cooIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsr( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *csrRowOffsets, void *csrColInd, void *csrValues, - cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCooAoS( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *cooInd, void *cooValues, cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCooAoS"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooInd, cooValues, cooIdxType, - idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpMat(cusparseSpMatDescr_t spMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCooGet(const cusparseSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz, - void **cooRowInd, // COO row indices - void **cooColInd, // COO column indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - void **, void **, cusparseIndexType_t *, cusparseIndexBase_t *, - cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - idxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCooAoSGet(const cusparseSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz, - void **cooInd, // COO indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - void **, cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooAoSGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooInd, cooValues, idxType, - idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrGet( - const cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *nnz, void **csrRowOffsets, void **csrColInd, void **csrValues, - cusparseIndexType_t *csrRowOffsetsType, cusparseIndexType_t *csrColIndType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - void **, void **, cusparseIndexType_t *, cusparseIndexType_t *, - cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCsrGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetFormat( - const cusparseSpMatDescr_t spMatDescr, cusparseFormat_t *format) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, - cusparseFormat_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, format); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetIndexBase( - const cusparseSpMatDescr_t spMatDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetValues(const cusparseSpMatDescr_t spMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetValues(cusparseSpMatDescr_t spMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetStridedBatch(cusparseSpMatDescr_t spMatDescr, int batchCount) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int); - static auto func_ptr = LoadSymbol("cusparseSpMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetStridedBatch( - const cusparseSpMatDescr_t spMatDescr, int *batchCount) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseSpMatDescr_t, int *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateDnMat( - cusparseDnMatDescr_t *dnMatDescr, int64_t rows, int64_t cols, int64_t ld, - void *values, cudaDataType valueType, cusparseOrder_t order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnMatDescr_t *, int64_t, int64_t, int64_t, void *, cudaDataType, - cusparseOrder_t); - static auto func_ptr = LoadSymbol("cusparseCreateDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnMat(cusparseDnMatDescr_t dnMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatGet( - const cusparseDnMatDescr_t dnMatDescr, int64_t *rows, int64_t *cols, - int64_t *ld, void **values, cudaDataType *type, cusparseOrder_t *order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - const cusparseDnMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - cudaDataType *, cusparseOrder_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetValues(const cusparseDnMatDescr_t dnMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(const cusparseDnMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatSetValues(cusparseDnMatDescr_t dnMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatSetStridedBatch( - cusparseDnMatDescr_t dnMatDescr, int batchCount, int64_t batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, int, int64_t); - static auto func_ptr = LoadSymbol("cusparseDnMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetStridedBatch(const cusparseDnMatDescr_t dnMatDescr, - int *batchCount, int64_t *batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const cusparseDnMatDescr_t, - int *, int64_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVV(cusparseHandle_t handle, cusparseOperation_t opX, - const cusparseSpVecDescr_t vecX, const cusparseDnVecDescr_t vecY, - void *result, cudaDataType computeType, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseSpVecDescr_t, - const cusparseDnVecDescr_t, void *, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseSpVV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opX, - const cusparseSpVecDescr_t vecX, const cusparseDnVecDescr_t vecY, - const void *result, cudaDataType computeType, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseSpVecDescr_t, - const cusparseDnVecDescr_t, const void *, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpVV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - const cusparseSpMatDescr_t matA, const cusparseDnVecDescr_t vecX, - const void *beta, const cusparseDnVecDescr_t vecY, cudaDataType computeType, - cusparseSpMVAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnVecDescr_t, const void *, - const cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - const cusparseSpMatDescr_t matA, const cusparseDnVecDescr_t vecX, - const void *beta, const cusparseDnVecDescr_t vecY, cudaDataType computeType, - cusparseSpMVAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnVecDescr_t, const void *, - const cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseDnMatDescr_t matC, cudaDataType computeType, cusparseSpMMAlg_t alg, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseSpMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseDnMatDescr_t matC, cudaDataType computeType, cusparseSpMMAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseSpMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseDnMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseSpMatDescr_t matC, cudaDataType computeType, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseDnMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseConstrainedGeMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, const cusparseDnMatDescr_t matA, - const cusparseDnMatDescr_t matB, const void *beta, - cusparseSpMatDescr_t matC, cudaDataType computeType, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - const cusparseDnMatDescr_t, const cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, size_t *); - static auto func_ptr = - LoadSymbol("cusparseConstrainedGeMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - bufferSize); -} - -#endif // _WIN32 - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_11_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_11_0.inc deleted file mode 100644 index f762efb1932edb..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_11_0.inc +++ /dev/null @@ -1,7025 +0,0 @@ -// Auto-generated, do not edit. - -#define CUSPARSE_DEPRECATED(new_func) - -extern "C" { - -cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); - static auto func_ptr = LoadSymbol("cusparseCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); - static auto func_ptr = LoadSymbol("cusparseDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, - int *version) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); - static auto func_ptr = LoadSymbol("cusparseGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusparseGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -const char *CUSPARSEAPI cusparseGetErrorName(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorName"); - if (!func_ptr) return "cusparseGetErrorName symbol not found."; - return func_ptr(status); -} - -const char *CUSPARSEAPI cusparseGetErrorString(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorString"); - if (!func_ptr) return "cusparseGetErrorString symbol not found."; - return func_ptr(status); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusparseSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusparseGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, - cusparsePointerMode_t *); - static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); - static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); - static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, - const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseCopyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dest, src); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, - cusparseMatrixType_t type) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, type); -} - -cusparseMatrixType_t CUSPARSEAPI -cusparseGetMatType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatType"); - if (!func_ptr) return cusparseMatrixType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); - static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, fillMode); -} - -cusparseFillMode_t CUSPARSEAPI -cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); - if (!func_ptr) return cusparseFillMode_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, diagType); -} - -cusparseDiagType_t CUSPARSEAPI -cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); - if (!func_ptr) return cusparseDiagType_t(-1); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, - cusparseIndexBase_t base) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, base); -} - -cusparseIndexBase_t CUSPARSEAPI -cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); - if (!func_ptr) return cusparseIndexBase_t(-1); - return func_ptr(descrA); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsv2Info(csrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsv2Info(csrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateColorInfo(cusparseColorInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyColorInfo(cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t alg) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); - static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t *alg) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, - cusparseColorAlg_t *); - static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -CUSPARSE_DEPRECATED(cusparseAxpby) -cusparseStatus_t CUSPARSEAPI cusparseSaxpyi(cusparseHandle_t handle, int nnz, - const float *alpha, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const int *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseAxpby) -cusparseStatus_t CUSPARSEAPI cusparseDaxpyi(cusparseHandle_t handle, int nnz, - const double *alpha, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const int *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseAxpby) -cusparseStatus_t CUSPARSEAPI cusparseCaxpyi(cusparseHandle_t handle, int nnz, - const cuComplex *alpha, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, const int *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseAxpby) -cusparseStatus_t CUSPARSEAPI cusparseZaxpyi(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *alpha, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const int *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseSgthr(cusparseHandle_t handle, int nnz, - const float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, float *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseDgthr(cusparseHandle_t handle, int nnz, - const double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, double *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseCgthr(cusparseHandle_t handle, int nnz, - const cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseZgthr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseSgthrz(cusparseHandle_t handle, int nnz, - float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, float *, float *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseDgthrz(cusparseHandle_t handle, int nnz, - double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, double *, double *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseCgthrz(cusparseHandle_t handle, int nnz, - cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseGather) -cusparseStatus_t CUSPARSEAPI cusparseZgthrz(cusparseHandle_t handle, int nnz, - cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuDoubleComplex *, cuDoubleComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseScatter) -cusparseStatus_t CUSPARSEAPI cusparseSsctr(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, - const float *, const int *, - float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseScatter) -cusparseStatus_t CUSPARSEAPI cusparseDsctr(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, double *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseScatter) -cusparseStatus_t CUSPARSEAPI cusparseCsctr(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, cuComplex *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseScatter) -cusparseStatus_t CUSPARSEAPI cusparseZsctr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseRot) -cusparseStatus_t CUSPARSEAPI cusparseSroti(cusparseHandle_t handle, int nnz, - float *xVal, const int *xInd, - float *y, const float *c, - const float *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, float *, const int *, float *, const float *, - const float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -CUSPARSE_DEPRECATED(cusparseRot) -cusparseStatus_t CUSPARSEAPI cusparseDroti(cusparseHandle_t handle, int nnz, - double *xVal, const int *xInd, - double *y, const double *c, - const double *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, double *, const int *, double *, const double *, - const double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const float *alpha, const float *A, int lda, int nnz, - const float *xVal, const int *xInd, const float *beta, float *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const float *, int, int, const float *, const int *, const float *, - float *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseSgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const double *alpha, const double *A, int lda, int nnz, - const double *xVal, const int *xInd, const double *beta, - double *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const double *, int, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseDgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cuComplex *A, int lda, int nnz, - const cuComplex *xVal, const int *xInd, const cuComplex *beta, cuComplex *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cuComplex *, int, int, const cuComplex *, const int *, - const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, int nnz, - const cuDoubleComplex *xVal, const int *xInd, const cuDoubleComplex *beta, - cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -CUSPARSE_DEPRECATED(cusparseSpMV) -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx_bufferSize( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, bufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpMV) -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const cuComplex *, const cuComplex *, - cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const float *x, - const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const int *, const int *, int, const float *, const float *, - float *); - static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const double *x, - const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const int *, const int *, int, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const int *, const int *, int, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const int *, - const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle, - csrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const float *f, float *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - csrsv2Info_t, const float *, float *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const double *f, double *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - csrsv2Info_t, const double *, double *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuComplex *f, - cuComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - csrsv2Info_t, const cuComplex *, cuComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpSV) -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuDoubleComplex *f, - cuDoubleComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, csrsv2Info_t, const cuDoubleComplex *, cuDoubleComplex *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, - bsrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, bsrsv2Info_t, const float *, float *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, bsrsv2Info_t, const double *, double *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuComplex *f, cuComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, bsrsv2Info_t, const cuComplex *, - cuComplex *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, - const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const float *B, - const int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const int, const float *, const int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const double *B, - const int ldb, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const int, const double *, const int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, - const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const int, const cuComplex *, const int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseCbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, - const int blockSize, const cuDoubleComplex *B, const int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, const int, const cuDoubleComplex *, const int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -CUSPARSE_DEPRECATED(cusparseSpMM) -cusparseStatus_t CUSPARSEAPI cusparseSgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, const float *alpha, - const float *A, int lda, const float *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const float *, const float *, int, - const float *, const int *, const int *, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -CUSPARSE_DEPRECATED(cusparseSpMM) -cusparseStatus_t CUSPARSEAPI cusparseDgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, const double *alpha, - const double *A, int lda, const double *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const double *, const double *, int, - const double *, const int *, const int *, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -CUSPARSE_DEPRECATED(cusparseSpMM) -cusparseStatus_t CUSPARSEAPI cusparseCgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuComplex *alpha, const cuComplex *A, int lda, - const cuComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, const int *, const int *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -CUSPARSE_DEPRECATED(cusparseSpMM) -cusparseStatus_t CUSPARSEAPI -cusparseZgemmi(cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, - const cuDoubleComplex *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsm2Info(csrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsm2Info(csrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsm2_zeroPivot(cusparseHandle_t handle, - csrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_bufferSizeExt( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_analysis( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, csrsm2Info_t, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float *B, int ldb, - csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, float *, int, csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, double *B, - int ldb, csrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int, csrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm2_solve( - cusparseHandle_t handle, int algo, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int nrhs, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *B, int ldb, csrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cusparseOperation_t, cusparseOperation_t, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, int, - csrsm2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, transA, transB, m, nrhs, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, - bsrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const float *B, int ldb, float *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const double *B, int ldb, double *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( - cusparseHandle_t handle, csrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( - cusparseHandle_t handle, bsrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, - csric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, - bsric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - const float *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - const double *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, - const double *d, const double *du, double *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *dl, float *d, float *du, - float *x, int batchCount, void *pBuffer) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, float *, - float *, float *, float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *dl, double *d, double *du, - double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - double *, double *, double *, - double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *dl, cuComplex *d, - cuComplex *du, cuComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *dl, - cuDoubleComplex *d, cuDoubleComplex *du, cuDoubleComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *ds, const float *dl, - const float *d, const float *du, const float *dw, const float *x, - int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, const float *, const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *ds, - const double *dl, const double *d, const double *du, const double *dw, - const double *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, const double *, const double *, int, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *ds, - const cuComplex *dl, const cuComplex *d, const cuComplex *du, - const cuComplex *dw, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, const cuComplex *, - const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *ds, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, const cuDoubleComplex *dw, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *ds, float *dl, float *d, - float *du, float *dw, float *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, float *, float *, float *, float *, float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *ds, double *dl, double *d, - double *du, double *dw, double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, double *, double *, double *, double *, - double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *ds, cuComplex *dl, - cuComplex *d, cuComplex *du, cuComplex *dw, cuComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *ds, - cuDoubleComplex *dl, cuDoubleComplex *d, cuDoubleComplex *du, - cuDoubleComplex *dw, cuDoubleComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrgemm2Info(csrgemm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrgemm2Info(csrgemm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, csrgemm2Info_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const float *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const double *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuDoubleComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuDoubleComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseXcsrgemm2Nnz( - cusparseHandle_t handle, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, int, - const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemm2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const float *csrSortedValD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const double *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const cuComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, - const cusparseMatDescr_t, cuComplex *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSpGEMM) -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrD, int nnzD, - const cuDoubleComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, cuDoubleComplex *, const int *, - int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, const cuComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeam2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, workspace); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const double *, int *, - int *, int *, const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const float *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const float *, - const int *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const double *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const double *, - const int *, int *, int *, double); - static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, - const int *, int *, int *, cuComplex); - static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - int *nnzPerRow, int *nnzC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, - const int *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - float *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, int, const int *, float *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - double *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, int, const int *, double *, int *, int *, - double); - static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuComplex *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const int *, cuComplex *, int *, int *, - cuComplex); - static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuDoubleComplex *csrSortedValC, int *csrSortedColIndC, - int *csrSortedRowPtrC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, const int *, - cuDoubleComplex *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseSdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, float *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseDdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, double *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseCdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cuComplex *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseZdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerRow, - cuDoubleComplex *csrSortedValA, int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseScsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseDcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseCcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseZcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseSdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerCol, float *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseDdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerCol, double *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseCdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerCol, cuComplex *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -CUSPARSE_DEPRECATED(cusparseDenseToSparse) -cusparseStatus_t CUSPARSEAPI cusparseZdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerCol, - cuDoubleComplex *cscSortedValA, int *cscSortedRowIndA, - int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseScsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseDcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseCcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -CUSPARSE_DEPRECATED(cusparseSparseToDense) -cusparseStatus_t CUSPARSEAPI cusparseZcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, - const int *cooRowInd, int nnz, - int m, int *csrSortedRowPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, - const int *csrSortedRowPtr, - int nnz, int m, int *cooRowInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, - int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, int, const cusparseMatDescr_t, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, const cusparseMatDescr_t, int *, int, int, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, - colBlockDim, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, const cusparseMatDescr_t, - float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, const cusparseMatDescr_t, - double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, const cusparseMatDescr_t, - cuComplex *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, - const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, - int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const int *, const int *, int, int, - const cusparseMatDescr_t, int *, int, int, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, - bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, - cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, - int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, - int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); - static auto func_ptr = - LoadSymbol("cusparseCreateIdentityPermutation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, p); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, - const int *cooColsA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, int *cooColsA, - int *P, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, - int *cooColsA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, - const int *csrColIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *csrRowPtrA, - int *csrColIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, - const int *cscRowIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *cscColPtrA, - int *cscRowIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcscsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, float *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, double *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - const float *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - const double *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, const float *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, const double *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, const float *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const double *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, const double *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, float *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, double *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - bufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSpVec(cusparseSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, - void *indices, void *values, cusparseIndexType_t idxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpVecDescr_t *, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateSpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpVec(cusparseSpVecDescr_t spVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGet(cusparseSpVecDescr_t spVecDescr, - int64_t *size, int64_t *nnz, - void **indices, void **values, - cusparseIndexType_t *idxType, - cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpVecDescr_t, int64_t *, int64_t *, void **, void **, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseSpVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGetIndexBase( - cusparseSpVecDescr_t spVecDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpVecGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecGetValues(cusparseSpVecDescr_t spVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecSetValues(cusparseSpVecDescr_t spVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, - void *values, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnVecDescr_t *, int64_t, void *, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnVec(cusparseDnVecDescr_t dnVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnVecGet(cusparseDnVecDescr_t dnVecDescr, - int64_t *size, void **values, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnVecDescr_t, int64_t *, void **, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseDnVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecGetValues(cusparseDnVecDescr_t dnVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecSetValues(cusparseDnVecDescr_t dnVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpMat(cusparseSpMatDescr_t spMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetFormat( - cusparseSpMatDescr_t spMatDescr, cusparseFormat_t *format) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, cusparseFormat_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, format); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetIndexBase( - cusparseSpMatDescr_t spMatDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetValues(cusparseSpMatDescr_t spMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetValues(cusparseSpMatDescr_t spMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetSize(cusparseSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetStridedBatch(cusparseSpMatDescr_t spMatDescr, int batchCount) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int); - static auto func_ptr = LoadSymbol("cusparseSpMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetStridedBatch(cusparseSpMatDescr_t spMatDescr, int *batchCount) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI cusparseCooSetStridedBatch( - cusparseSpMatDescr_t spMatDescr, int batchCount, int64_t batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int, int64_t); - static auto func_ptr = LoadSymbol("cusparseCooSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrSetStridedBatch( - cusparseSpMatDescr_t spMatDescr, int batchCount, int64_t offsetsBatchStride, - int64_t columnsValuesBatchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int, - int64_t, int64_t); - static auto func_ptr = LoadSymbol("cusparseCsrSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount, offsetsBatchStride, - columnsValuesBatchStride); -} - -#if CUDA_VERSION >= 11030 - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetAttribute( - cusparseSpMatDescr_t spMatDescr, cusparseSpMatAttribute_t attribute, - void *data, size_t dataSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, cusparseSpMatAttribute_t, void *, size_t); - static auto func_ptr = LoadSymbol("cusparseSpMatGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, attribute, data, dataSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatSetAttribute( - cusparseSpMatDescr_t spMatDescr, cusparseSpMatAttribute_t attribute, - void *data, size_t dataSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, cusparseSpMatAttribute_t, void *, size_t); - static auto func_ptr = LoadSymbol("cusparseSpMatSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, attribute, data, dataSize); -} - -#endif // CUDA_VERSION >= 11030 - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsr( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *csrRowOffsets, void *csrColInd, void *csrValues, - cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsc( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *cscColOffsets, void *cscRowInd, void *cscValues, - cusparseIndexType_t cscColOffsetsType, cusparseIndexType_t cscRowIndType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, - cscValues, cscColOffsetsType, cscRowIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, - void **csrRowOffsets, void **csrColInd, void **csrValues, - cusparseIndexType_t *csrRowOffsetsType, cusparseIndexType_t *csrColIndType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, - void **, cusparseIndexType_t *, cusparseIndexType_t *, - cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCsrGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCsrSetPointers(cusparseSpMatDescr_t spMatDescr, void *csrRowOffsets, - void *csrColInd, void *csrValues) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, - void *, void *); - static auto func_ptr = LoadSymbol("cusparseCsrSetPointers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, csrRowOffsets, csrColInd, csrValues); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCscSetPointers(cusparseSpMatDescr_t spMatDescr, void *cscColOffsets, - void *cscRowInd, void *cscValues) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, - void *, void *); - static auto func_ptr = LoadSymbol("cusparseCscSetPointers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, cscColOffsets, cscRowInd, cscValues); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t *spMatDescr, - int64_t rows, int64_t cols, - int64_t nnz, void *cooRowInd, - void *cooColInd, void *cooValues, - cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCoo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - cooIdxType, idxBase, valueType); -} - -CUSPARSE_DEPRECATED(cusparseCreateCoo) -cusparseStatus_t CUSPARSEAPI cusparseCreateCooAoS( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *cooInd, void *cooValues, cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCooAoS"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooInd, cooValues, cooIdxType, - idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCooGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, - void **cooRowInd, // COO row indices - void **cooColInd, // COO column indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, - void **, cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - idxType, idxBase, valueType); -} - -CUSPARSE_DEPRECATED(cusparseCooGet) -cusparseStatus_t CUSPARSEAPI cusparseCooAoSGet(cusparseSpMatDescr_t spMatDescr, - int64_t *rows, int64_t *cols, - int64_t *nnz, - void **cooInd, // COO indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, - cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooAoSGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooInd, cooValues, idxType, - idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCooSetPointers(cusparseSpMatDescr_t spMatDescr, void *cooRows, - void *cooColumns, void *cooValues) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, - void *, void *); - static auto func_ptr = LoadSymbol("cusparseCooSetPointers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, cooRows, cooColumns, cooValues); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBlockedEll( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, - int64_t ellBlockSize, int64_t ellCols, void *ellColInd, void *ellValue, - cusparseIndexType_t ellIdxType, cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, int64_t, void *, - void *, cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateBlockedEll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, - ellValue, ellIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseBlockedEllGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *ellBlockSize, int64_t *ellCols, void **ellColInd, void **ellValue, - cusparseIndexType_t *ellIdxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, int64_t *, void **, - void **, cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseBlockedEllGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, - ellValue, ellIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateDnMat( - cusparseDnMatDescr_t *dnMatDescr, int64_t rows, int64_t cols, int64_t ld, - void *values, cudaDataType valueType, cusparseOrder_t order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnMatDescr_t *, int64_t, int64_t, int64_t, void *, cudaDataType, - cusparseOrder_t); - static auto func_ptr = LoadSymbol("cusparseCreateDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnMat(cusparseDnMatDescr_t dnMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatGet(cusparseDnMatDescr_t dnMatDescr, - int64_t *rows, int64_t *cols, - int64_t *ld, void **values, - cudaDataType *type, - cusparseOrder_t *order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - cudaDataType *, cusparseOrder_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetValues(cusparseDnMatDescr_t dnMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatSetValues(cusparseDnMatDescr_t dnMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatSetStridedBatch( - cusparseDnMatDescr_t dnMatDescr, int batchCount, int64_t batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, int, int64_t); - static auto func_ptr = LoadSymbol("cusparseDnMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatGetStridedBatch( - cusparseDnMatDescr_t dnMatDescr, int *batchCount, int64_t *batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, int *, int64_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseAxpby(cusparseHandle_t handle, - const void *alpha, - cusparseSpVecDescr_t vecX, - const void *beta, - cusparseDnVecDescr_t vecY) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const void *, cusparseSpVecDescr_t, const void *, - cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseAxpby"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, vecX, beta, vecY); -} - -cusparseStatus_t CUSPARSEAPI cusparseGather(cusparseHandle_t handle, - cusparseDnVecDescr_t vecY, - cusparseSpVecDescr_t vecX) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDnVecDescr_t, cusparseSpVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseGather"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, vecY, vecX); -} - -cusparseStatus_t CUSPARSEAPI cusparseScatter(cusparseHandle_t handle, - cusparseSpVecDescr_t vecX, - cusparseDnVecDescr_t vecY) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSpVecDescr_t, cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseScatter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, vecX, vecY); -} - -cusparseStatus_t CUSPARSEAPI cusparseRot(cusparseHandle_t handle, - const void *c_coeff, - const void *s_coeff, - cusparseSpVecDescr_t vecX, - cusparseDnVecDescr_t vecY) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const void *, const void *, cusparseSpVecDescr_t, - cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseRot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, c_coeff, s_coeff, vecX, vecY); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opX, cusparseSpVecDescr_t vecX, - cusparseDnVecDescr_t vecY, const void *result, cudaDataType computeType, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseSpVecDescr_t, - cusparseDnVecDescr_t, const void *, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpVV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVV(cusparseHandle_t handle, cusparseOperation_t opX, - cusparseSpVecDescr_t vecX, cusparseDnVecDescr_t vecY, void *result, - cudaDataType computeType, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseSpVecDescr_t, - cusparseDnVecDescr_t, void *, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseSpVV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, externalBuffer); -} - -#if CUSPARSE_VERSION >= 11300 - -cusparseStatus_t CUSPARSEAPI cusparseSparseToDense_bufferSize( - cusparseHandle_t handle, cusparseSpMatDescr_t matA, - cusparseDnMatDescr_t matB, cusparseSparseToDenseAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSpMatDescr_t, cusparseDnMatDescr_t, - cusparseSparseToDenseAlg_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSparseToDense_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSparseToDense( - cusparseHandle_t handle, cusparseSpMatDescr_t matA, - cusparseDnMatDescr_t matB, cusparseSparseToDenseAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSpMatDescr_t, cusparseDnMatDescr_t, - cusparseSparseToDenseAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSparseToDense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_bufferSize( - cusparseHandle_t handle, cusparseDnMatDescr_t matA, - cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDnMatDescr_t, cusparseSpMatDescr_t, - cusparseDenseToSparseAlg_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDenseToSparse_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_analysis( - cusparseHandle_t handle, cusparseDnMatDescr_t matA, - cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDnMatDescr_t, cusparseSpMatDescr_t, - cusparseDenseToSparseAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseDenseToSparse_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_convert( - cusparseHandle_t handle, cusparseDnMatDescr_t matA, - cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDnMatDescr_t, cusparseSpMatDescr_t, - cusparseDenseToSparseAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseDenseToSparse_convert"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, buffer); -} - -#endif // CUSPARSE_VERSION >= 11300 - -cusparseStatus_t CUSPARSEAPI cusparseSpMV( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseSpMatDescr_t matA, cusparseDnVecDescr_t vecX, const void *beta, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpMVAlg_t alg, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, cusparseSpMatDescr_t, - cusparseDnVecDescr_t, const void *, cusparseDnVecDescr_t, cudaDataType, - cusparseSpMVAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseSpMatDescr_t matA, cusparseDnVecDescr_t vecX, const void *beta, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpMVAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, cusparseSpMatDescr_t, - cusparseDnVecDescr_t, const void *, cusparseDnVecDescr_t, cudaDataType, - cusparseSpMVAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - bufferSize); -} - -#if CUDA_VERSION >= 11030 - -cusparseStatus_t CUSPARSEAPI -cusparseSpSV_createDescr(cusparseSpSVDescr_t *descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSVDescr_t *); - static auto func_ptr = LoadSymbol("cusparseSpSV_createDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpSV_destroyDescr(cusparseSpSVDescr_t descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSVDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpSV_destroyDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseSpMatDescr_t matA, cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, - cusparseSpSVDescr_t spsvDescr, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, cusparseSpMatDescr_t, - cusparseDnVecDescr_t, cusparseDnVecDescr_t, cudaDataType, - cusparseSpSVAlg_t, cusparseSpSVDescr_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpSV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, - spsvDescr, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSV_analysis( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseSpMatDescr_t matA, cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, - cusparseSpSVDescr_t spsvDescr, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, cusparseSpMatDescr_t, - cusparseDnVecDescr_t, cusparseDnVecDescr_t, cudaDataType, - cusparseSpSVAlg_t, cusparseSpSVDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpSV_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, - spsvDescr, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSV_solve( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseSpMatDescr_t matA, cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, - cusparseSpSVDescr_t spsvDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, cusparseSpMatDescr_t, - cusparseDnVecDescr_t, cusparseDnVecDescr_t, cudaDataType, - cusparseSpSVAlg_t, cusparseSpSVDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpSV_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, - spsvDescr); -} - -#endif // CUDA_VERSION >= 11030 - -cusparseStatus_t CUSPARSEAPI cusparseSpMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseSpMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseDnMatDescr_t matC, cudaDataType computeType, - cusparseSpMMAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseSpMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM_preprocess( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseSpMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseDnMatDescr_t matC, cudaDataType computeType, - cusparseSpMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseSpMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMM_preprocess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseSpMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseDnMatDescr_t matC, cudaDataType computeType, - cusparseSpMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseSpMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpGEMM_createDescr(cusparseSpGEMMDescr_t *descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_createDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpGEMM_destroyDescr(cusparseSpGEMMDescr_t descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_destroyDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_workEstimation( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseSpMatDescr_t matA, cusparseSpMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - cusparseSpGEMMAlg_t alg, cusparseSpGEMMDescr_t spgemmDescr, - size_t *bufferSize1, void *externalBuffer1) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseSpMatDescr_t, cusparseSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t, size_t *, void *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_workEstimation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr, bufferSize1, externalBuffer1); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_compute( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseSpMatDescr_t matA, cusparseSpMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - cusparseSpGEMMAlg_t alg, cusparseSpGEMMDescr_t spgemmDescr, - size_t *bufferSize2, void *externalBuffer2) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseSpMatDescr_t, cusparseSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t, size_t *, void *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_compute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr, bufferSize2, externalBuffer2); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_copy( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseSpMatDescr_t matA, cusparseSpMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - cusparseSpGEMMAlg_t alg, cusparseSpGEMMDescr_t spgemmDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseSpMatDescr_t, cusparseSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_copy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr); -} - -CUSPARSE_DEPRECATED(cusparseSDDMM) -cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseDnMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseDnMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseConstrainedGeMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - externalBuffer); -} - -CUSPARSE_DEPRECATED(cusparseSDDMM) -cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseDnMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseDnMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, size_t *); - static auto func_ptr = - LoadSymbol("cusparseConstrainedGeMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - bufferSize); -} - -#if CUSPARSE_VERSION >= 11400 - -cusparseStatus_t CUSPARSEAPI cusparseSDDMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseDnMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - cusparseSDDMMAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseDnMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSDDMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSDDMM_preprocess( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseDnMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - cusparseSDDMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseDnMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSDDMM_preprocess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSDDMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseDnMatDescr_t matA, cusparseDnMatDescr_t matB, - const void *beta, cusparseSpMatDescr_t matC, cudaDataType computeType, - cusparseSDDMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseDnMatDescr_t, cusparseDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSDDMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -#endif // CUSPARSE_VERSION >= 11400 - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_12_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_12_0.inc deleted file mode 100644 index 91641482860d90..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_12_0.inc +++ /dev/null @@ -1,6080 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); - static auto func_ptr = LoadSymbol("cusparseCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); - static auto func_ptr = LoadSymbol("cusparseDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, - int *version) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); - static auto func_ptr = LoadSymbol("cusparseGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusparseGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -const char *CUSPARSEAPI cusparseGetErrorName(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorName"); - if (!func_ptr) return "cusparseGetErrorName symbol not found."; - return func_ptr(status); -} - -const char *CUSPARSEAPI cusparseGetErrorString(cusparseStatus_t status) { - using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); - static auto func_ptr = LoadSymbol("cusparseGetErrorString"); - if (!func_ptr) return "cusparseGetErrorString symbol not found."; - return func_ptr(status); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusparseSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusparseGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, - cusparsePointerMode_t *); - static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); - static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseLoggerSetCallback(cusparseLoggerCallback_t callback) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseLoggerCallback_t); - static auto func_ptr = LoadSymbol("cusparseLoggerSetCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(callback); -} - -cusparseStatus_t CUSPARSEAPI cusparseLoggerSetFile(FILE *file) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(FILE *); - static auto func_ptr = LoadSymbol("cusparseLoggerSetFile"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(file); -} - -cusparseStatus_t CUSPARSEAPI cusparseLoggerOpenFile(const char *logFile) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const char *); - static auto func_ptr = LoadSymbol("cusparseLoggerOpenFile"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(logFile); -} - -cusparseStatus_t CUSPARSEAPI cusparseLoggerSetLevel(int level) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(int); - static auto func_ptr = LoadSymbol("cusparseLoggerSetLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(level); -} - -cusparseStatus_t CUSPARSEAPI cusparseLoggerSetMask(int mask) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(int); - static auto func_ptr = LoadSymbol("cusparseLoggerSetMask"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mask); -} - -cusparseStatus_t CUSPARSEAPI cusparseLoggerForceDisable(void) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(); - static auto func_ptr = LoadSymbol("cusparseLoggerForceDisable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); - static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, - cusparseMatrixType_t type) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, type); -} - -cusparseMatrixType_t CUSPARSEAPI -cusparseGetMatType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatType"); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); - static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, fillMode); -} - -cusparseFillMode_t CUSPARSEAPI -cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, diagType); -} - -cusparseDiagType_t CUSPARSEAPI -cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, - cusparseIndexBase_t base) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, base); -} - -cusparseIndexBase_t CUSPARSEAPI -cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateColorInfo(cusparseColorInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyColorInfo(cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t alg) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); - static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t *alg) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, - cusparseColorAlg_t *); - static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const float *alpha, const float *A, int lda, int nnz, - const float *xVal, const int *xInd, const float *beta, float *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const float *, int, int, const float *, const int *, const float *, - float *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseSgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const double *alpha, const double *A, int lda, int nnz, - const double *xVal, const int *xInd, const double *beta, - double *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const double *, int, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseDgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cuComplex *A, int lda, int nnz, - const cuComplex *xVal, const int *xInd, const cuComplex *beta, cuComplex *y, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cuComplex *, int, int, const cuComplex *, const int *, - const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, int nnz, - const cuDoubleComplex *xVal, const int *xInd, const cuDoubleComplex *beta, - cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const cuComplex *, const cuComplex *, - cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const float *x, - const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const int *, const int *, int, const float *, const float *, - float *); - static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const double *x, - const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const int *, const int *, int, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const int *, const int *, int, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const int *, - const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, - bsrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, bsrsv2Info_t, const float *, float *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, bsrsv2Info_t, const double *, double *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuComplex *f, cuComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, bsrsv2Info_t, const cuComplex *, - cuComplex *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, - const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const float *B, - const int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const int, const float *, const int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const double *B, - const int ldb, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const int, const double *, const int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, - const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const int, const cuComplex *, const int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseCbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, - const int blockSize, const cuDoubleComplex *B, const int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, const int, const cuDoubleComplex *, const int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, - bsrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const float *B, int ldb, float *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const double *B, int ldb, double *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuComplex *B, int ldb, cuComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, B, ldb, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( - cusparseHandle_t handle, csrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( - cusparseHandle_t handle, bsrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, - csric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, - bsric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - const float *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - const double *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, - const double *d, const double *du, double *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *dl, float *d, float *du, - float *x, int batchCount, void *pBuffer) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, float *, - float *, float *, float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *dl, double *d, double *du, - double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - double *, double *, double *, - double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *dl, cuComplex *d, - cuComplex *du, cuComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *dl, - cuDoubleComplex *d, cuDoubleComplex *du, cuDoubleComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const float *ds, const float *dl, - const float *d, const float *du, const float *dw, const float *x, - int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, const float *, const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const double *ds, - const double *dl, const double *d, const double *du, const double *dw, - const double *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, const double *, const double *, int, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuComplex *ds, - const cuComplex *dl, const cuComplex *d, const cuComplex *du, - const cuComplex *dw, const cuComplex *x, int batchCount, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, const cuComplex *, - const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt( - cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *ds, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, const cuDoubleComplex *dw, - const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgpsvInterleavedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, float *ds, float *dl, float *d, - float *du, float *dw, float *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, float *, float *, float *, float *, float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, double *ds, double *dl, double *d, - double *du, double *dw, double *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, double *, double *, double *, double *, - double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuComplex *ds, cuComplex *dl, - cuComplex *d, cuComplex *du, cuComplex *dw, cuComplex *x, int batchCount, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, - cuComplex *, cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch( - cusparseHandle_t handle, int algo, int m, cuDoubleComplex *ds, - cuDoubleComplex *dl, cuDoubleComplex *d, cuDoubleComplex *du, - cuDoubleComplex *dw, cuDoubleComplex *x, int batchCount, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, - cuDoubleComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgpsvInterleavedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, const cuComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - const cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeam2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, workspace); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const double *, int *, - int *, int *, const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const float *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const float *, - const int *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const double *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const double *, - const int *, int *, int *, double); - static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, - int *nnzC, cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, - const int *, int *, int *, cuComplex); - static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - int *nnzPerRow, int *nnzC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, - const int *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, - nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - float *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, int, const int *, float *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - double *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, int, const int *, double *, int *, int *, - double); - static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuComplex *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, - cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const int *, cuComplex *, int *, int *, - cuComplex); - static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedColIndA, - const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, - cuDoubleComplex *csrSortedValC, int *csrSortedColIndC, - int *csrSortedRowPtrC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, const int *, - cuDoubleComplex *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, - csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, - csrSortedColIndC, csrSortedRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, - const int *cooRowInd, int nnz, - int m, int *csrSortedRowPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, - const int *csrSortedRowPtr, - int nnz, int m, int *cooRowInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, - int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, int, const cusparseMatDescr_t, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, const cusparseMatDescr_t, int *, int, int, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, - colBlockDim, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, const cusparseMatDescr_t, - float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, const cusparseMatDescr_t, - double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, const cusparseMatDescr_t, - cuComplex *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, - const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, - int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const int *, const int *, int, int, - const cusparseMatDescr_t, int *, int, int, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, - bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, - cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, - int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, - int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); - static auto func_ptr = - LoadSymbol("cusparseCreateIdentityPermutation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, p); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, - const int *cooColsA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, int *cooColsA, - int *P, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, - int *cooColsA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, - const int *csrColIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *csrRowPtrA, - int *csrColIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, - const int *cscRowIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *cscColPtrA, - int *cscRowIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcscsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, float *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, double *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - const float *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - const double *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *threshold, const cusparseMatDescr_t descrC, - float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *threshold, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, threshold, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const float *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, const float *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, - const double *csrSortedValC, const int *csrSortedRowPtrC, - const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, const double *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const float *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, const float *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, const double *csrSortedValC, - const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, const double *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, float *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, double *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, percentage, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, - const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, - int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, const int *, const int *, - void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, - cusparseCsr2CscAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, - cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, - bufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSpVec(cusparseSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, - void *indices, void *values, cusparseIndexType_t idxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpVecDescr_t *, int64_t, int64_t, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateSpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateConstSpVec( - cusparseConstSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, - const void *indices, const void *values, cusparseIndexType_t idxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpVecDescr_t *, int64_t, int64_t, const void *, const void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateConstSpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpVec(cusparseConstSpVecDescr_t spVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGet(cusparseSpVecDescr_t spVecDescr, - int64_t *size, int64_t *nnz, - void **indices, void **values, - cusparseIndexType_t *idxType, - cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpVecDescr_t, int64_t *, int64_t *, void **, void **, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseSpVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstSpVecGet( - cusparseConstSpVecDescr_t spVecDescr, int64_t *size, int64_t *nnz, - const void **indices, const void **values, cusparseIndexType_t *idxType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpVecDescr_t, int64_t *, int64_t *, const void **, - const void **, cusparseIndexType_t *, cusparseIndexBase_t *, - cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseConstSpVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVecGetIndexBase( - cusparseConstSpVecDescr_t spVecDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpVecDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpVecGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecGetValues(cusparseSpVecDescr_t spVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstSpVecGetValues( - cusparseConstSpVecDescr_t spVecDescr, const void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpVecDescr_t, const void **); - static auto func_ptr = LoadSymbol("cusparseConstSpVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVecSetValues(cusparseSpVecDescr_t spVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, - void *values, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnVecDescr_t *, int64_t, void *, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateConstDnVec(cusparseConstDnVecDescr_t *dnVecDescr, int64_t size, - const void *values, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstDnVecDescr_t *, int64_t, const void *, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateConstDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnVec(cusparseConstDnVecDescr_t dnVecDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnVec"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnVecGet(cusparseDnVecDescr_t dnVecDescr, - int64_t *size, void **values, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnVecDescr_t, int64_t *, void **, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseDnVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseConstDnVecGet(cusparseConstDnVecDescr_t dnVecDescr, int64_t *size, - const void **values, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstDnVecDescr_t, int64_t *, const void **, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseConstDnVecGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, size, values, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecGetValues(cusparseDnVecDescr_t dnVecDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstDnVecGetValues( - cusparseConstDnVecDescr_t dnVecDescr, const void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnVecDescr_t, const void **); - static auto func_ptr = LoadSymbol("cusparseConstDnVecGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnVecSetValues(cusparseDnVecDescr_t dnVecDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnVecSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnVecDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySpMat(cusparseConstSpMatDescr_t spMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroySpMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetFormat( - cusparseConstSpMatDescr_t spMatDescr, cusparseFormat_t *format) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, - cusparseFormat_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetFormat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, format); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetIndexBase( - cusparseConstSpMatDescr_t spMatDescr, cusparseIndexBase_t *idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, - cusparseIndexBase_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetValues(cusparseSpMatDescr_t spMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseSpMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstSpMatGetValues( - cusparseConstSpMatDescr_t spMatDescr, const void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, const void **); - static auto func_ptr = LoadSymbol("cusparseConstSpMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatSetValues(cusparseSpMatDescr_t spMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMatGetSize(cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetStridedBatch( - cusparseConstSpMatDescr_t spMatDescr, int *batchCount) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, int *); - static auto func_ptr = LoadSymbol("cusparseSpMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount); -} - -cusparseStatus_t CUSPARSEAPI cusparseCooSetStridedBatch( - cusparseSpMatDescr_t spMatDescr, int batchCount, int64_t batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int, int64_t); - static auto func_ptr = LoadSymbol("cusparseCooSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrSetStridedBatch( - cusparseSpMatDescr_t spMatDescr, int batchCount, int64_t offsetsBatchStride, - int64_t columnsValuesBatchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int, - int64_t, int64_t); - static auto func_ptr = LoadSymbol("cusparseCsrSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, batchCount, offsetsBatchStride, - columnsValuesBatchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatGetAttribute( - cusparseConstSpMatDescr_t spMatDescr, cusparseSpMatAttribute_t attribute, - void *data, size_t dataSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t, cusparseSpMatAttribute_t, void *, size_t); - static auto func_ptr = LoadSymbol("cusparseSpMatGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, attribute, data, dataSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMatSetAttribute( - cusparseSpMatDescr_t spMatDescr, cusparseSpMatAttribute_t attribute, - void *data, size_t dataSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, cusparseSpMatAttribute_t, void *, size_t); - static auto func_ptr = LoadSymbol("cusparseSpMatSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, attribute, data, dataSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsr( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *csrRowOffsets, void *csrColInd, void *csrValues, - cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateConstCsr( - cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, - int64_t nnz, const void *csrRowOffsets, const void *csrColInd, - const void *csrValues, cusparseIndexType_t csrRowOffsetsType, - cusparseIndexType_t csrColIndType, cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, const void *, - const void *, const void *, cusparseIndexType_t, cusparseIndexType_t, - cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateConstCsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsc( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, - void *cscColOffsets, void *cscRowInd, void *cscValues, - cusparseIndexType_t cscColOffsetsType, cusparseIndexType_t cscRowIndType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, - cscValues, cscColOffsetsType, cscRowIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateConstCsc( - cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, - int64_t nnz, const void *cscColOffsets, const void *cscRowInd, - const void *cscValues, cusparseIndexType_t cscColOffsetsType, - cusparseIndexType_t cscRowIndType, cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, const void *, - const void *, const void *, cusparseIndexType_t, cusparseIndexType_t, - cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateConstCsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, - cscValues, cscColOffsetsType, cscRowIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, - void **csrRowOffsets, void **csrColInd, void **csrValues, - cusparseIndexType_t *csrRowOffsetsType, cusparseIndexType_t *csrColIndType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, - void **, cusparseIndexType_t *, cusparseIndexType_t *, - cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCsrGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstCsrGet( - cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *nnz, const void **csrRowOffsets, const void **csrColInd, - const void **csrValues, cusparseIndexType_t *csrRowOffsetsType, - cusparseIndexType_t *csrColIndType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, const void **, - const void **, const void **, cusparseIndexType_t *, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseConstCsrGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, - csrValues, csrRowOffsetsType, csrColIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCscGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, - void **cscColOffsets, void **cscRowInd, void **cscValues, - cusparseIndexType_t *cscColOffsetsType, cusparseIndexType_t *cscRowIndType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, - void **, cusparseIndexType_t *, cusparseIndexType_t *, - cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCscGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, - cscValues, cscColOffsetsType, cscRowIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstCscGet( - cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *nnz, const void **cscColOffsets, const void **cscRowInd, - const void **cscValues, cusparseIndexType_t *cscColOffsetsType, - cusparseIndexType_t *cscRowIndType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, const void **, - const void **, const void **, cusparseIndexType_t *, - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseConstCscGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, - cscValues, cscColOffsetsType, cscRowIndType, idxBase, - valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCsrSetPointers(cusparseSpMatDescr_t spMatDescr, void *csrRowOffsets, - void *csrColInd, void *csrValues) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, - void *, void *); - static auto func_ptr = LoadSymbol("cusparseCsrSetPointers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, csrRowOffsets, csrColInd, csrValues); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCscSetPointers(cusparseSpMatDescr_t spMatDescr, void *cscColOffsets, - void *cscRowInd, void *cscValues) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, - void *, void *); - static auto func_ptr = LoadSymbol("cusparseCscSetPointers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, cscColOffsets, cscRowInd, cscValues); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t *spMatDescr, - int64_t rows, int64_t cols, - int64_t nnz, void *cooRowInd, - void *cooColInd, void *cooValues, - cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, - cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateCoo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - cooIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateConstCoo( - cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, - int64_t nnz, const void *cooRowInd, const void *cooColInd, - const void *cooValues, cusparseIndexType_t cooIdxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, const void *, - const void *, const void *, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateConstCoo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - cooIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCooGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, - void **cooRowInd, // COO row indices - void **cooColInd, // COO column indices - void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, - void **, // COO row indices - void **, // COO column indices - void **, // COO values - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseCooGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - idxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseConstCooGet(cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, - int64_t *cols, int64_t *nnz, - const void **cooRowInd, // COO row indices - const void **cooColInd, // COO column indices - const void **cooValues, // COO values - cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, - const void **, // COO row indices - const void **, // COO column indices - const void **, // COO values - cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseConstCooGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, - idxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCooSetPointers(cusparseSpMatDescr_t spMatDescr, void *cooRows, - void *cooColumns, void *cooValues) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, - void *, void *); - static auto func_ptr = LoadSymbol("cusparseCooSetPointers"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, cooRows, cooColumns, cooValues); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBlockedEll( - cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, - int64_t ellBlockSize, int64_t ellCols, void *ellColInd, void *ellValue, - cusparseIndexType_t ellIdxType, cusparseIndexBase_t idxBase, - cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, int64_t, void *, - void *, cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateBlockedEll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, - ellValue, ellIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateConstBlockedEll( - cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, - int64_t ellBlockSize, int64_t ellCols, const void *ellColInd, - const void *ellValue, cusparseIndexType_t ellIdxType, - cusparseIndexBase_t idxBase, cudaDataType valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, int64_t, - const void *, const void *, cusparseIndexType_t, cusparseIndexBase_t, - cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCreateConstBlockedEll"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, - ellValue, ellIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseBlockedEllGet( - cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *ellBlockSize, int64_t *ellCols, void **ellColInd, void **ellValue, - cusparseIndexType_t *ellIdxType, cusparseIndexBase_t *idxBase, - cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, int64_t *, void **, - void **, cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseBlockedEllGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, - ellValue, ellIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstBlockedEllGet( - cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, - int64_t *ellBlockSize, int64_t *ellCols, const void **ellColInd, - const void **ellValue, cusparseIndexType_t *ellIdxType, - cusparseIndexBase_t *idxBase, cudaDataType *valueType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, int64_t *, - const void **, const void **, cusparseIndexType_t *, - cusparseIndexBase_t *, cudaDataType *); - static auto func_ptr = LoadSymbol("cusparseConstBlockedEllGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, - ellValue, ellIdxType, idxBase, valueType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateDnMat( - cusparseDnMatDescr_t *dnMatDescr, int64_t rows, int64_t cols, int64_t ld, - void *values, cudaDataType valueType, cusparseOrder_t order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnMatDescr_t *, int64_t, int64_t, int64_t, void *, cudaDataType, - cusparseOrder_t); - static auto func_ptr = LoadSymbol("cusparseCreateDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateConstDnMat(cusparseConstDnMatDescr_t *dnMatDescr, int64_t rows, - int64_t cols, int64_t ld, const void *values, - cudaDataType valueType, cusparseOrder_t order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstDnMatDescr_t *, int64_t, int64_t, int64_t, const void *, - cudaDataType, cusparseOrder_t); - static auto func_ptr = LoadSymbol("cusparseCreateConstDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyDnMat(cusparseConstDnMatDescr_t dnMatDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyDnMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatGet(cusparseDnMatDescr_t dnMatDescr, - int64_t *rows, int64_t *cols, - int64_t *ld, void **values, - cudaDataType *type, - cusparseOrder_t *order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseDnMatDescr_t, int64_t *, int64_t *, int64_t *, void **, - cudaDataType *, cusparseOrder_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseConstDnMatGet(cusparseConstDnMatDescr_t dnMatDescr, int64_t *rows, - int64_t *cols, int64_t *ld, const void **values, - cudaDataType *type, cusparseOrder_t *order) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseConstDnMatDescr_t, int64_t *, int64_t *, int64_t *, const void **, - cudaDataType *, cusparseOrder_t *); - static auto func_ptr = LoadSymbol("cusparseConstDnMatGet"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetValues(cusparseDnMatDescr_t dnMatDescr, void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void **); - static auto func_ptr = LoadSymbol("cusparseDnMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseConstDnMatGetValues( - cusparseConstDnMatDescr_t dnMatDescr, const void **values) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnMatDescr_t, const void **); - static auto func_ptr = LoadSymbol("cusparseConstDnMatGetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatSetValues(cusparseDnMatDescr_t dnMatDescr, void *values) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseDnMatSetValues"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, values); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnMatSetStridedBatch( - cusparseDnMatDescr_t dnMatDescr, int batchCount, int64_t batchStride) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, int, int64_t); - static auto func_ptr = LoadSymbol("cusparseDnMatSetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnMatGetStridedBatch(cusparseConstDnMatDescr_t dnMatDescr, - int *batchCount, int64_t *batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnMatDescr_t, - int *, int64_t *); - static auto func_ptr = LoadSymbol("cusparseDnMatGetStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dnMatDescr, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseAxpby(cusparseHandle_t handle, - const void *alpha, - cusparseConstSpVecDescr_t vecX, - const void *beta, - cusparseDnVecDescr_t vecY) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const void *, cusparseConstSpVecDescr_t, const void *, - cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseAxpby"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alpha, vecX, beta, vecY); -} - -cusparseStatus_t CUSPARSEAPI cusparseGather(cusparseHandle_t handle, - cusparseConstDnVecDescr_t vecY, - cusparseSpVecDescr_t vecX) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstDnVecDescr_t, cusparseSpVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseGather"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, vecY, vecX); -} - -cusparseStatus_t CUSPARSEAPI cusparseScatter(cusparseHandle_t handle, - cusparseConstSpVecDescr_t vecX, - cusparseDnVecDescr_t vecY) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstSpVecDescr_t, cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseScatter"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, vecX, vecY); -} - -cusparseStatus_t CUSPARSEAPI cusparseRot(cusparseHandle_t handle, - const void *c_coeff, - const void *s_coeff, - cusparseSpVecDescr_t vecX, - cusparseDnVecDescr_t vecY) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const void *, const void *, cusparseSpVecDescr_t, - cusparseDnVecDescr_t); - static auto func_ptr = LoadSymbol("cusparseRot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, c_coeff, s_coeff, vecX, vecY); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpVV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opX, - cusparseConstSpVecDescr_t vecX, cusparseConstDnVecDescr_t vecY, - const void *result, cudaDataType computeType, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseConstSpVecDescr_t, - cusparseConstDnVecDescr_t, const void *, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpVV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpVV(cusparseHandle_t handle, cusparseOperation_t opX, - cusparseConstSpVecDescr_t vecX, cusparseConstDnVecDescr_t vecY, - void *result, cudaDataType computeType, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseConstSpVecDescr_t, - cusparseConstDnVecDescr_t, void *, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseSpVV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opX, vecX, vecY, result, computeType, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSparseToDense_bufferSize( - cusparseHandle_t handle, cusparseConstSpMatDescr_t matA, - cusparseDnMatDescr_t matB, cusparseSparseToDenseAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstSpMatDescr_t, cusparseDnMatDescr_t, - cusparseSparseToDenseAlg_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSparseToDense_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSparseToDense(cusparseHandle_t handle, cusparseConstSpMatDescr_t matA, - cusparseDnMatDescr_t matB, cusparseSparseToDenseAlg_t alg, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstSpMatDescr_t, cusparseDnMatDescr_t, - cusparseSparseToDenseAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSparseToDense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_bufferSize( - cusparseHandle_t handle, cusparseConstDnMatDescr_t matA, - cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, - size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstDnMatDescr_t, cusparseSpMatDescr_t, - cusparseDenseToSparseAlg_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDenseToSparse_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_analysis( - cusparseHandle_t handle, cusparseConstDnMatDescr_t matA, - cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstDnMatDescr_t, cusparseSpMatDescr_t, - cusparseDenseToSparseAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseDenseToSparse_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_convert( - cusparseHandle_t handle, cusparseConstDnMatDescr_t matA, - cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseConstDnMatDescr_t, cusparseSpMatDescr_t, - cusparseDenseToSparseAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseDenseToSparse_convert"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, matA, matB, alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, - const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, - cusparseSpMVAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, const void *, - cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMV"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, - const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, - cusparseSpMVAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, const void *, - cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, - bufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpSV_createDescr(cusparseSpSVDescr_t *descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSVDescr_t *); - static auto func_ptr = LoadSymbol("cusparseSpSV_createDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpSV_destroyDescr(cusparseSpSVDescr_t descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSVDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpSV_destroyDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSV_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, - cusparseSpSVDescr_t spsvDescr, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, - cusparseDnVecDescr_t, cudaDataType, cusparseSpSVAlg_t, - cusparseSpSVDescr_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpSV_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, - spsvDescr, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSV_analysis( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, - cusparseSpSVDescr_t spsvDescr, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, - cusparseDnVecDescr_t, cudaDataType, cusparseSpSVAlg_t, - cusparseSpSVDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpSV_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, - spsvDescr, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSV_solve( - cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, - cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, - cusparseSpSVDescr_t spsvDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, - cusparseDnVecDescr_t, cudaDataType, cusparseSpSVAlg_t, - cusparseSpSVDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpSV_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, - spsvDescr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpSM_createDescr(cusparseSpSMDescr_t *descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSMDescr_t *); - static auto func_ptr = LoadSymbol("cusparseSpSM_createDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpSM_destroyDescr(cusparseSpSMDescr_t descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpSM_destroyDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstDnMatDescr_t matB, cusparseDnMatDescr_t matC, - cudaDataType computeType, cusparseSpSMAlg_t alg, - cusparseSpSMDescr_t spsmDescr, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, - cusparseDnMatDescr_t, cudaDataType, cusparseSpSMAlg_t, - cusparseSpSMDescr_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpSM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, matC, computeType, alg, - spsmDescr, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSM_analysis( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstDnMatDescr_t matB, cusparseDnMatDescr_t matC, - cudaDataType computeType, cusparseSpSMAlg_t alg, - cusparseSpSMDescr_t spsmDescr, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, - cusparseDnMatDescr_t, cudaDataType, cusparseSpSMAlg_t, - cusparseSpSMDescr_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpSM_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, matC, computeType, alg, - spsmDescr, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpSM_solve( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstDnMatDescr_t matB, cusparseDnMatDescr_t matC, - cudaDataType computeType, cusparseSpSMAlg_t alg, - cusparseSpSMDescr_t spsmDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, - cusparseDnMatDescr_t, cudaDataType, cusparseSpSMAlg_t, - cusparseSpSMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpSM_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, matC, computeType, alg, - spsmDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstDnMatDescr_t matB, const void *beta, cusparseDnMatDescr_t matC, - cudaDataType computeType, cusparseSpMMAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM_preprocess( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstDnMatDescr_t matB, const void *beta, cusparseDnMatDescr_t matC, - cudaDataType computeType, cusparseSpMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMM_preprocess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstDnMatDescr_t matB, const void *beta, cusparseDnMatDescr_t matC, - cudaDataType computeType, cusparseSpMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, const void *, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpGEMM_createDescr(cusparseSpGEMMDescr_t *descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_createDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpGEMM_destroyDescr(cusparseSpGEMMDescr_t descr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_destroyDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_workEstimation( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, - void *externalBuffer1) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t, size_t *, void *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_workEstimation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr, bufferSize1, externalBuffer1); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_getNumProducts( - cusparseSpGEMMDescr_t spgemmDescr, int64_t *num_prods) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t, int64_t *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_getNumProducts"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(spgemmDescr, num_prods); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_estimateMemory( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr, float chunk_fraction, - size_t *bufferSize3, void *externalBuffer3, size_t *bufferSize2) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t, float, size_t *, void *, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_estimateMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr, chunk_fraction, bufferSize3, - externalBuffer3, bufferSize2); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_compute( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, - void *externalBuffer2) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t, size_t *, void *); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_compute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr, bufferSize2, externalBuffer2); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_copy( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpGEMM_copy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_workEstimation( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - cusparseConstSpMatDescr_t matA, cusparseConstSpMatDescr_t matB, - cusparseSpMatDescr_t matC, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, - void *externalBuffer1) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, - cusparseSpMatDescr_t, cusparseSpGEMMAlg_t, cusparseSpGEMMDescr_t, - size_t *, void *); - static auto func_ptr = - LoadSymbol("cusparseSpGEMMreuse_workEstimation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, matA, matB, matC, alg, spgemmDescr, - bufferSize1, externalBuffer1); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_nnz( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - cusparseConstSpMatDescr_t matA, cusparseConstSpMatDescr_t matB, - cusparseSpMatDescr_t matC, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, - void *externalBuffer2, size_t *bufferSize3, void *externalBuffer3, - size_t *bufferSize4, void *externalBuffer4) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, - cusparseSpMatDescr_t, cusparseSpGEMMAlg_t, cusparseSpGEMMDescr_t, - size_t *, void *, size_t *, void *, size_t *, void *); - static auto func_ptr = LoadSymbol("cusparseSpGEMMreuse_nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, matA, matB, matC, alg, spgemmDescr, - bufferSize2, externalBuffer2, bufferSize3, externalBuffer3, - bufferSize4, externalBuffer4); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_copy( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - cusparseConstSpMatDescr_t matA, cusparseConstSpMatDescr_t matB, - cusparseSpMatDescr_t matC, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize5, - void *externalBuffer5) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, - cusparseSpMatDescr_t, cusparseSpGEMMAlg_t, cusparseSpGEMMDescr_t, - size_t *, void *); - static auto func_ptr = LoadSymbol("cusparseSpGEMMreuse_copy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, matA, matB, matC, alg, spgemmDescr, - bufferSize5, externalBuffer5); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_compute( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstSpMatDescr_t matA, - cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSpGEMMAlg_t alg, - cusparseSpGEMMDescr_t spgemmDescr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, - cusparseSpGEMMDescr_t); - static auto func_ptr = LoadSymbol("cusparseSpGEMMreuse_compute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, spgemmDescr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSDDMM_bufferSize( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstDnMatDescr_t matA, - cusparseConstDnMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSDDMMAlg_t alg, size_t *bufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstDnMatDescr_t, cusparseConstDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSDDMM_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, bufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSDDMM_preprocess( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstDnMatDescr_t matA, - cusparseConstDnMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSDDMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstDnMatDescr_t, cusparseConstDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSDDMM_preprocess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSDDMM( - cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, - const void *alpha, cusparseConstDnMatDescr_t matA, - cusparseConstDnMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, - cudaDataType computeType, cusparseSDDMMAlg_t alg, void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, - cusparseConstDnMatDescr_t, cusparseConstDnMatDescr_t, const void *, - cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, void *); - static auto func_ptr = LoadSymbol("cusparseSDDMM"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, - alg, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMMOp_createPlan( - cusparseHandle_t handle, cusparseSpMMOpPlan_t *plan, - cusparseOperation_t opA, cusparseOperation_t opB, - cusparseConstSpMatDescr_t matA, cusparseConstDnMatDescr_t matB, - cusparseDnMatDescr_t matC, cudaDataType computeType, - cusparseSpMMOpAlg_t alg, const void *addOperationNvvmBuffer, - size_t addOperationBufferSize, const void *mulOperationNvvmBuffer, - size_t mulOperationBufferSize, const void *epilogueNvvmBuffer, - size_t epilogueBufferSize, size_t *SpMMWorkspaceSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSpMMOpPlan_t *, cusparseOperation_t, - cusparseOperation_t, cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, - cusparseDnMatDescr_t, cudaDataType, cusparseSpMMOpAlg_t, const void *, - size_t, const void *, size_t, const void *, size_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSpMMOp_createPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, plan, opA, opB, matA, matB, matC, computeType, alg, - addOperationNvvmBuffer, addOperationBufferSize, - mulOperationNvvmBuffer, mulOperationBufferSize, - epilogueNvvmBuffer, epilogueBufferSize, SpMMWorkspaceSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpMMOp(cusparseSpMMOpPlan_t plan, - void *externalBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMMOpPlan_t, void *); - static auto func_ptr = LoadSymbol("cusparseSpMMOp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan, externalBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSpMMOp_destroyPlan(cusparseSpMMOpPlan_t plan) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMMOpPlan_t); - static auto func_ptr = LoadSymbol("cusparseSpMMOp_destroyPlan"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(plan); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_9_0.inc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_9_0.inc deleted file mode 100644 index 22d0105b1bea5a..00000000000000 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_9_0.inc +++ /dev/null @@ -1,7152 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); - static auto func_ptr = LoadSymbol("cusparseCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); - static auto func_ptr = LoadSymbol("cusparseDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, - int *version) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); - static auto func_ptr = LoadSymbol("cusparseGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, version); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, - int *value) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); - static auto func_ptr = LoadSymbol("cusparseGetProperty"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(type, value); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, - cudaStream_t streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cusparseSetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, - cudaStream_t *streamId) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); - static auto func_ptr = LoadSymbol("cusparseGetStream"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, streamId); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, - cusparsePointerMode_t *); - static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); - static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mode); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); - static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, - const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseCopyMatDescr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dest, src); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, - cusparseMatrixType_t type) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, type); -} - -cusparseMatrixType_t CUSPARSEAPI -cusparseGetMatType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); - static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, fillMode); -} - -cusparseFillMode_t CUSPARSEAPI -cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); - static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, diagType); -} - -cusparseDiagType_t CUSPARSEAPI -cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, - cusparseIndexBase_t base) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA, base); -} - -cusparseIndexBase_t CUSPARSEAPI -cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { - using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); - static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(descrA); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateSolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroySolveAnalysisInfo(cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSolveAnalysisInfo_t); - static auto func_ptr = - LoadSymbol("cusparseDestroySolveAnalysisInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseGetLevelInfo(cusparseHandle_t handle, cusparseSolveAnalysisInfo_t info, - int *nlevels, int **levelPtr, int **levelInd) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseSolveAnalysisInfo_t, int *, int **, int **); - static auto func_ptr = LoadSymbol("cusparseGetLevelInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, nlevels, levelPtr, levelInd); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrsv2Info(csrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrsv2Info(csrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateHybMat(cusparseHybMat_t *hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t *); - static auto func_ptr = LoadSymbol("cusparseCreateHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyHybMat(cusparseHybMat_t hybA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHybMat_t); - static auto func_ptr = LoadSymbol("cusparseDestroyHybMat"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hybA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateColorInfo(cusparseColorInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDestroyColorInfo(cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t alg) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); - static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, - cusparseColorAlg_t *alg) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, - cusparseColorAlg_t *); - static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info, alg); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); - static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); - static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseSaxpyi(cusparseHandle_t handle, int nnz, - const float *alpha, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const int *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDaxpyi(cusparseHandle_t handle, int nnz, - const double *alpha, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const int *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCaxpyi(cusparseHandle_t handle, int nnz, - const cuComplex *alpha, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, const int *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZaxpyi(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *alpha, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const int *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZaxpyi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, alpha, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdoti(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - const float *y, - float *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const int *, const float *, float *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdoti(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - const double *y, - double *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdoti(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdoti(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdoti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdotci(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, const cuComplex *y, - cuComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, const cuComplex *, - cuComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdotci(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, - const cuDoubleComplex *y, - cuDoubleComplex *resultDevHostPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZdotci"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, resultDevHostPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthr(cusparseHandle_t handle, int nnz, - const float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, float *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthr(cusparseHandle_t handle, int nnz, - const double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, double *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthr(cusparseHandle_t handle, int nnz, - const cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgthrz(cusparseHandle_t handle, int nnz, - float *y, float *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, float *, float *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgthrz(cusparseHandle_t handle, int nnz, - double *y, double *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, double *, double *, - const int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgthrz(cusparseHandle_t handle, int nnz, - cuComplex *y, cuComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuComplex *, cuComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgthrz(cusparseHandle_t handle, int nnz, - cuDoubleComplex *y, - cuDoubleComplex *xVal, - const int *xInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, cuDoubleComplex *, cuDoubleComplex *, const int *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZgthrz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, y, xVal, xInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSsctr(cusparseHandle_t handle, int nnz, - const float *xVal, const int *xInd, - float *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, - const float *, const int *, - float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDsctr(cusparseHandle_t handle, int nnz, - const double *xVal, const int *xInd, - double *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const int *, double *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsctr(cusparseHandle_t handle, int nnz, - const cuComplex *xVal, - const int *xInd, cuComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const int *, cuComplex *, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZsctr(cusparseHandle_t handle, int nnz, - const cuDoubleComplex *xVal, - const int *xInd, cuDoubleComplex *y, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const int *, - cuDoubleComplex *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZsctr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSroti(cusparseHandle_t handle, int nnz, - float *xVal, const int *xInd, - float *y, const float *c, - const float *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, float *, const int *, float *, const float *, - const float *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseSroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDroti(cusparseHandle_t handle, int nnz, - double *xVal, const int *xInd, - double *y, const double *c, - const double *s, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, double *, const int *, double *, const double *, - const double *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDroti"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, nnz, xVal, xInd, y, c, s, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const float *alpha, /* host or device pointer */ - const float *A, int lda, int nnz, const float *xVal, - const int *xInd, const float *beta, /* host or device pointer */ - float *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const float *, int, int, const float *, const int *, const float *, - float *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseSgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, const double *alpha, /* host or device pointer */ - const double *A, int lda, int nnz, const double *xVal, - const int *xInd, const double *beta, /* host or device pointer */ - double *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const double *, int, int, const double *, const int *, const double *, - double *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseDgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, int nnz, const cuComplex *xVal, - const int *xInd, const cuComplex *beta, /* host or device pointer */ - cuComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cuComplex *, int, int, const cuComplex *, const int *, - const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemvi( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, int nnz, const cuDoubleComplex *xVal, - const int *xInd, const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, - const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgemvi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, - idxBase, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, - int m, int n, int nnz, int *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmv(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx_bufferSize( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, size_t *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrmvEx( - cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, - int m, int n, int nnz, const void *alpha, cudaDataType alphatype, - const cusparseMatDescr_t descrA, const void *csrValA, - cudaDataType csrValAtype, const int *csrRowPtrA, const int *csrColIndA, - const void *x, cudaDataType xtype, const void *beta, cudaDataType betatype, - void *y, cudaDataType ytype, cudaDataType executiontype, void *buffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseAlgMode_t, cusparseOperation_t, int, int, int, - const void *, cudaDataType, const cusparseMatDescr_t, const void *, - cudaDataType, const int *, const int *, const void *, cudaDataType, - const void *, cudaDataType, void *, cudaDataType, cudaDataType, void *); - static auto func_ptr = LoadSymbol("cusparseCsrmvEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, alg, transA, m, n, nnz, alpha, alphatype, descrA, - csrValA, csrValAtype, csrRowPtrA, csrColIndA, x, xtype, beta, - betatype, y, ytype, executiontype, buffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int nnz, const double *alpha, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *x, const cuComplex *beta, - cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmv_mp( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrmv_mp"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const float *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, const float *, - const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const double *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybmv( - cusparseHandle_t handle, cusparseOperation_t transA, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, - const cuComplex *x, const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuComplex *, - const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybmv(cusparseHandle_t handle, cusparseOperation_t transA, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, alpha, descrA, hybA, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const float *x, const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, const float *, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const double *x, const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, const double *, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const cuComplex *, const cuComplex *, - cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, - x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const float *x, - const float *beta, float *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const int *, const int *, int, const float *, const float *, - float *); - static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const double *x, - const double *beta, double *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const int *, const int *, int, const double *, - const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuComplex *x, - const cuComplex *beta, cuComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const int *, const int *, int, - const cuComplex *, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, - const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, - const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, - const cuDoubleComplex *beta, cuDoubleComplex *y) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const int *, - const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, - bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_analysisEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const void *csrSortedValA, - cudaDataType csrSortedValAtype, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_analysisEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrsv_solveEx( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const void *alpha, cudaDataType alphatype, const cusparseMatDescr_t descrA, - const void *csrSortedValA, cudaDataType csrSortedValAtype, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info, const void *f, cudaDataType ftype, - void *x, cudaDataType xtype, cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const void *, cudaDataType, - const cusparseMatDescr_t, const void *, cudaDataType, const int *, - const int *, cusparseSolveAnalysisInfo_t, const void *, cudaDataType, - void *, cudaDataType, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrsv_solveEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, alphatype, descrA, csrSortedValA, - csrSortedValAtype, csrSortedRowPtrA, csrSortedColIndA, info, - f, ftype, x, xtype, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, - cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsv2_zeroPivot(cusparseHandle_t handle, - csrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSize( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, csrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, csrsv2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrsv2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const float *f, float *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - csrsv2Info_t, const float *, float *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const double *f, double *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - csrsv2Info_t, const double *, double *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuComplex *f, - cuComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - csrsv2Info_t, const cuComplex *, cuComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseCcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsv2_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrsv2Info_t info, const cuDoubleComplex *f, - cuDoubleComplex *x, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, csrsv2Info_t, const cuDoubleComplex *, cuDoubleComplex *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, f, x, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, - bsrsv2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, float *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, double *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, - bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, - bsrsv2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, - int, bsrsv2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, - pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const float *alpha, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, int, bsrsv2Info_t, const float *, float *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const double *alpha, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const double *, const cusparseMatDescr_t, const double *, const int *, - const int *, int, bsrsv2Info_t, const double *, double *, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuComplex *f, cuComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, bsrsv2Info_t, const cuComplex *, - cuComplex *, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, - const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, - policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseShybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseShybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseChybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseChybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZhybsv_analysis(cusparseHandle_t handle, cusparseOperation_t transA, - const cusparseMatDescr_t descrA, cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cusparseMatDescr_t, - cusparseHybMat_t, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZhybsv_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, descrA, hybA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseShybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const float *alpha, - const cusparseMatDescr_t descra, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const float *f, float *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const float *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const float *, float *); - static auto func_ptr = LoadSymbol("cusparseShybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descra, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseChybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const cuComplex *alpha, - const cusparseMatDescr_t descra, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const cuComplex *f, cuComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuComplex *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseChybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descra, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, const double *alpha, - const cusparseMatDescr_t descra, const cusparseHybMat_t hybA, - cusparseSolveAnalysisInfo_t info, const double *f, double *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const double *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const double *, double *); - static auto func_ptr = LoadSymbol("cusparseDhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descra, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhybsv_solve( - cusparseHandle_t handle, cusparseOperation_t trans, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descra, - const cusparseHybMat_t hybA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *f, cuDoubleComplex *x) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, const cuDoubleComplex *, - const cusparseMatDescr_t, const cusparseHybMat_t, - cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZhybsv_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, alpha, descra, hybA, info, f, x); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm(cusparseHandle_t handle, cusparseOperation_t transA, int m, - int n, int k, int nnz, const float *alpha, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const float *B, int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, const double *beta, - double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const double *, int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int k, - int nnz, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, int, int, - const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *B, int ldb, - const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const float *, const cusparseMatDescr_t, const float *, const int *, - const int *, const float *, int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *B, int ldb, - const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const double *, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int, const double *, double *, - int); - static auto func_ptr = LoadSymbol("cusparseDcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrmm2(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuComplex *B, int ldb, - const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const cuComplex *, int, const cuComplex *, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrmm2( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - int, const cuDoubleComplex *, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsrmm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, nnz, alpha, descrA, - csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, - beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const float *B, - const int ldb, const float *beta, float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - const int, const float *, const int, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const double *B, - const int ldb, const double *beta, double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - const int, const double *, const int, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, - const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - const int, const cuComplex *, const int, const cuComplex *, cuComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseCbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int kb, int nnzb, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, - const int blockSize, const cuDoubleComplex *B, const int ldb, - const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, const int, const cuDoubleComplex *, const int, - const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZbsrmm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, - bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, - B, ldb, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const float *alpha, /* host or device pointer */ - const float *A, int lda, const float *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const float *beta, /* host or device pointer */ - float *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const float *, const float *, int, - const float *, const int *, const int *, const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const double *alpha, /* host or device pointer */ - const double *A, int lda, const double *cscValB, const int *cscColPtrB, - const int *cscRowIndB, const double *beta, /* host or device pointer */ - double *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const double *, const double *, int, - const double *, const int *, const int *, const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCgemmi(cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuComplex *alpha, /* host or device pointer */ - const cuComplex *A, int lda, const cuComplex *cscValB, - const int *cscColPtrB, const int *cscRowIndB, - const cuComplex *beta, /* host or device pointer */ - cuComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuComplex *, - const cuComplex *, int, const cuComplex *, const int *, const int *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgemmi( - cusparseHandle_t handle, int m, int n, int k, int nnz, - const cuDoubleComplex *alpha, /* host or device pointer */ - const cuDoubleComplex *A, int lda, const cuDoubleComplex *cscValB, - const int *cscColPtrB, const int *cscRowIndB, - const cuDoubleComplex *beta, /* host or device pointer */ - cuDoubleComplex *C, int ldc) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgemmi"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, - cscRowIndB, beta, C, ldc); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_analysis( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int nnz, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, nnz, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const float *alpha, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const float *F, int ldf, float *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const float *, int, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, F, ldf, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const double *alpha, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const double *F, int ldf, double *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const double *, int, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, F, ldf, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuComplex *F, int ldf, cuComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - cusparseSolveAnalysisInfo_t, const cuComplex *, int, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, F, ldf, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrsm_solve( - cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, - const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseSolveAnalysisInfo_t info, - const cuDoubleComplex *F, int ldf, cuDoubleComplex *X, int ldx) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, cusparseSolveAnalysisInfo_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZcsrsm_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, m, n, alpha, descrA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, info, F, ldf, X, ldx); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, - bsrsm2Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const float *alpha, const cusparseMatDescr_t descrA, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const float *F, int ldf, float *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, F, ldf, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const double *alpha, const cusparseMatDescr_t descrA, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const double *F, int ldf, double *X, int ldx, cusparseSolvePolicy_t policy, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, F, ldf, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuComplex *F, int ldf, cuComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, F, ldf, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( - cusparseHandle_t handle, cusparseDirection_t dirA, - cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, - int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, - const cuDoubleComplex *F, int ldf, cuDoubleComplex *X, int ldx, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, - cusparseOperation_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, - cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, - bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, - info, F, ldf, X, ldx, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsrilu0Ex( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, void *csrSortedValA_ValM, - cudaDataType csrSortedValA_ValMtype, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info, cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - void *, cudaDataType, const int *, const int *, - cusparseSolveAnalysisInfo_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsrilu0Ex"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedValA_ValMtype, csrSortedRowPtrA, csrSortedColIndA, - info, executiontype); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, float *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, double *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsrilu0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrilu0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( - cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( - cusparseHandle_t handle, csrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csrilu02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - float *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, float *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - double *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, double *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( - cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, - cuDoubleComplex *boost_val) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, enable_boost, tol, boost_val); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( - cusparseHandle_t handle, bsrilu02Info_t info, int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsrilu02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descra, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descra, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descra, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descra, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descra, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descra, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descra, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descra, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseScsric0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, float *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - float *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDcsric0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, double *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - double *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsric0(cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric0( - cusparseHandle_t handle, cusparseOperation_t trans, int m, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrSortedValA_ValM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - cusparseSolveAnalysisInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, cusparseSolveAnalysisInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsric0"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, trans, m, descrA, csrSortedValA_ValM, - csrSortedRowPtrA, csrSortedColIndA, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, - csric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, - csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, csric02Info_t info, - cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, csric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - float *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - double *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsric02( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - cuDoubleComplex *csrSortedValA_valM, - /* matrix A values are updated inplace - to be the preconditioner M values */ - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - csric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, - csrSortedColIndA, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, - bsric02Info_t info, - int *position) { - using FuncPtr = - cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, info, position); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, - bsric02Info_t info, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - size_t *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockSize, info, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pInputBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, float *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - float *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseSbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, double *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - double *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseDbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseCbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsric02( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, - const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, - const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, - bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, - cusparseSolvePolicy_t, void *); - static auto func_ptr = LoadSymbol("cusparseZbsric02"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, blockDim, info, policy, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, - int n, const float *dl, - const float *d, const float *du, - float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, - int n, const double *dl, - const double *d, const double *du, - double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, - int n, const cuComplex *dl, - const cuComplex *d, - const cuComplex *du, cuComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, - int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, - const cuDoubleComplex *du, - cuDoubleComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSgtsv_nopivot(cusparseHandle_t handle, int m, int n, const float *dl, - const float *d, const float *du, float *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, - const float *, const float *, - const float *, float *, int); - static auto func_ptr = LoadSymbol("cusparseSgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv_nopivot(cusparseHandle_t handle, int m, int n, const double *dl, - const double *d, const double *du, double *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZgtsv_nopivot(cusparseHandle_t handle, int m, int n, - const cuDoubleComplex *dl, const cuDoubleComplex *d, - const cuDoubleComplex *du, cuDoubleComplex *B, int ldb) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZgtsv_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - const float *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, const double *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, - int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const float *dl, const float *d, - const float *du, float *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const float *, const float *, - float *, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const double *dl, const double *d, - const double *du, double *B, int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const double *, - const double *, double *, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuComplex *dl, - const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, - int ldb, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsvStridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int); - static auto func_ptr = LoadSymbol("cusparseSgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsvStridedBatch( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, double *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int); - static auto func_ptr = LoadSymbol("cusparseDgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseCgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsvStridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int); - static auto func_ptr = LoadSymbol("cusparseZgtsvStridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, const float *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - const float *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const double *dl, const double *d, - const double *du, const double *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - const double *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, const cuComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, - const cuDoubleComplex *x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, - bufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( - cusparseHandle_t handle, int m, const float *dl, const float *d, - const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const float *, const float *, const float *, - float *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, - const double *d, const double *du, double *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const double *, const double *, const double *, - double *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, - const cuComplex *du, cuComplex *x, int batchCount, int batchStride, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuComplex *, const cuComplex *, - const cuComplex *, cuComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( - cusparseHandle_t handle, int m, const cuDoubleComplex *dl, - const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, - int batchCount, int batchStride, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, - const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseXcsrgemmNnz(cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, const int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemmNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, const int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, const int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, const int, const float *, const int *, - const int *, const cusparseMatDescr_t, const int, const float *, - const int *, const int *, const cusparseMatDescr_t, float *, const int *, - int *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, double *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cusparseMatDescr_t, cuComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm( - cusparseHandle_t handle, cusparseOperation_t transA, - cusparseOperation_t transB, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, int, int, int, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cusparseMatDescr_t, cuDoubleComplex *, - const int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCreateCsrgemm2Info(csrgemm2Info_t *info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t *); - static auto func_ptr = LoadSymbol("cusparseCreateCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrgemm2Info(csrgemm2Info_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrgemm2Info_t); - static auto func_ptr = LoadSymbol("cusparseDestroyCsrgemm2Info"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(info); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, csrgemm2Info_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const float *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const double *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuDoubleComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - csrgemm2Info_t info, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int, const int *, const int *, - const cuDoubleComplex *, const cusparseMatDescr_t, int, const int *, - const int *, csrgemm2Info_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, beta, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgemm2Nnz( - cusparseHandle_t handle, int m, int n, int k, - const cusparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrD, int nnzD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, int, - const int *, const int *, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrgemm2Nnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrD, nnzD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, const float *beta, - const cusparseMatDescr_t descrD, int nnzD, const float *csrSortedValD, - const int *csrSortedRowPtrD, const int *csrSortedColIndD, - const cusparseMatDescr_t descrC, float *csrSortedValC, - const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const cusparseMatDescr_t, - int, const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const double *beta, const cusparseMatDescr_t descrD, int nnzD, - const double *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, - const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const cusparseMatDescr_t, - int, const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const cuComplex *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cuComplex *beta, const cusparseMatDescr_t descrD, int nnzD, - const cuComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuComplex *, - const int *, const int *, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, - const cusparseMatDescr_t, cuComplex *, const int *, int *, - const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgemm2( - cusparseHandle_t handle, int m, int n, int k, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrD, int nnzD, - const cuDoubleComplex *csrSortedValD, const int *csrSortedRowPtrD, - const int *csrSortedColIndD, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, - int *csrSortedColIndC, const csrgemm2Info_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, int, const cuDoubleComplex *, - const int *, const int *, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cusparseMatDescr_t, cuDoubleComplex *, const int *, - int *, const csrgemm2Info_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsrgemm2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, k, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, beta, - descrD, nnzD, csrSortedValD, csrSortedRowPtrD, - csrSortedColIndD, descrC, csrSortedValC, csrSortedRowPtrC, - csrSortedColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrgeamNnz( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - int *csrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, - const int *, const cusparseMatDescr_t, int, const int *, const int *, - const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsrgeamNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, - csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, - csrSortedColIndB, descrC, csrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrgeam( - cusparseHandle_t handle, int m, int n, const float *alpha, - const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, - const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, - const int *csrSortedRowPtrB, const int *csrSortedColIndB, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, - const float *, const int *, const int *, const float *, - const cusparseMatDescr_t, int, const float *, const int *, const int *, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam( - cusparseHandle_t handle, int m, int n, const double *alpha, - const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const double *beta, const cusparseMatDescr_t descrB, int nnzB, - const double *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, - const double *, const int *, const int *, const double *, - const cusparseMatDescr_t, int, const double *, const int *, const int *, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam( - cusparseHandle_t handle, int m, int n, const cuComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, - const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, - int, const cuComplex *, const int *, const int *, const cuComplex *, - const cusparseMatDescr_t, int, const cuComplex *, const int *, - const int *, const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam( - cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, - const cusparseMatDescr_t descrA, int nnzA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cuDoubleComplex *beta, - const cusparseMatDescr_t descrB, int nnzB, - const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, - const int *csrSortedColIndB, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cuDoubleComplex *, - const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, - const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsrgeam"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, - csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseScsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, const float *, int *, int *, int *, - const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( - cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, - int *coloring, int *reordering, const cusparseColorInfo_t info) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, const double *, int *, - int *, int *, const cusparseColorInfo_t); - static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, fractionToColor, ncolors, coloring, - reordering, info); -} - -cusparseStatus_t CUSPARSEAPI -cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, - int *nnzPerRowCol, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZnnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const float *csrValA, const int *csrRowPtrA, int *nnzPerRow, int *nnzC, - float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const float *, - const int *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrValA, csrRowPtrA, nnzPerRow, nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const double *csrValA, const int *csrRowPtrA, int *nnzPerRow, int *nnzC, - double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const double *, - const int *, int *, int *, double); - static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrValA, csrRowPtrA, nnzPerRow, nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuComplex *csrValA, const int *csrRowPtrA, int *nnzPerRow, int *nnzC, - cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, - const int *, int *, int *, cuComplex); - static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrValA, csrRowPtrA, nnzPerRow, nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( - cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, - const cuDoubleComplex *csrValA, const int *csrRowPtrA, int *nnzPerRow, - int *nnzC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, - const int *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, descr, csrValA, csrRowPtrA, nnzPerRow, nnzC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descra, - const float *csrValA, const int *csrColIndA, const int *csrRowPtrA, - int nnzA, const int *nnzPerRow, float *csrValC, int *csrColIndC, - int *csrRowPtrC, float tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, int, const int *, float *, int *, int *, float); - static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descra, csrValA, csrColIndA, csrRowPtrA, nnzA, - nnzPerRow, csrValC, csrColIndC, csrRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( - cusparseHandle_t handle, - int m, // number of rows - int n, const cusparseMatDescr_t descra, - const double *csrValA, // csr values array-the elements which are below a - // certain tolerance will be remvoed - const int *csrColIndA, - const int *csrRowPtrA, // corresponding input noncompressed row pointer - int nnzA, const int *nnzPerRow, double *csrValC, int *csrColIndC, - int *csrRowPtrC, double tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, int, const int *, double *, int *, int *, - double); - static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descra, csrValA, csrColIndA, csrRowPtrA, nnzA, - nnzPerRow, csrValC, csrColIndC, csrRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( - cusparseHandle_t handle, - int m, // number of rows - int n, const cusparseMatDescr_t descra, - const cuComplex *csrValA, // csr values array-the elements which are below - // a certain tolerance will be remvoed - const int *csrColIndA, - const int *csrRowPtrA, // corresponding input noncompressed row pointer - int nnzA, const int *nnzPerRow, cuComplex *csrValC, int *csrColIndC, - int *csrRowPtrC, cuComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, int, const int *, cuComplex *, int *, int *, - cuComplex); - static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descra, csrValA, csrColIndA, csrRowPtrA, nnzA, - nnzPerRow, csrValC, csrColIndC, csrRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( - cusparseHandle_t handle, - int m, // number of rows - int n, const cusparseMatDescr_t descra, - const cuDoubleComplex - *csrValA, // csr values array-the elements which are below a certain - // tolerance will be remvoed - const int *csrColIndA, - const int *csrRowPtrA, // corresponding input noncompressed row pointer - int nnzA, const int *nnzPerRow, cuDoubleComplex *csrValC, int *csrColIndC, - int *csrRowPtrC, cuDoubleComplex tol) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, const int *, - cuDoubleComplex *, int *, int *, cuDoubleComplex); - static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descra, csrValA, csrColIndA, csrRowPtrA, nnzA, - nnzPerRow, csrValC, csrColIndC, csrRowPtrC, tol); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, float *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, double *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cuComplex *csrSortedValA, - int *csrSortedRowPtrA, int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csr( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerRow, - cuDoubleComplex *csrSortedValA, int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, csrSortedValA, - csrSortedRowPtrA, csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsr2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerCol, float *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerCol, double *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerCol, cuComplex *cscSortedValA, - int *cscSortedRowIndA, int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZdense2csc( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *A, int lda, const int *nnzPerCol, - cuDoubleComplex *cscSortedValA, int *cscSortedRowIndA, - int *cscSortedColPtrA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cuDoubleComplex *, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseZdense2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerCol, cscSortedValA, - cscSortedRowIndA, cscSortedColPtrA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float *, int); - static auto func_ptr = LoadSymbol("cusparseScsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, double *, int); - static auto func_ptr = LoadSymbol("cusparseDcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseCcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2dense( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cuDoubleComplex *, - int); - static auto func_ptr = LoadSymbol("cusparseZcsc2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, - const int *cooRowInd, int nnz, - int m, int *csrSortedRowPtr, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, - const int *csrSortedRowPtr, - int nnz, int m, int *cooRowInd, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx( - cusparseHandle_t handle, int m, int n, int nnz, const void *csrSortedVal, - cudaDataType csrSortedValtype, const int *csrSortedRowPtr, - const int *csrSortedColInd, void *cscSortedVal, - cudaDataType cscSortedValtype, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase, - cudaDataType executiontype) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const void *, cudaDataType, const int *, - const int *, void *, cudaDataType, int *, int *, cusparseAction_t, - cusparseIndexBase_t, cudaDataType); - static auto func_ptr = LoadSymbol("cusparseCsr2cscEx"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedValtype, - csrSortedRowPtr, csrSortedColInd, cscSortedVal, - cscSortedValtype, cscSortedRowInd, cscSortedColPtr, - copyValues, idxBase, executiontype); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const float *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, float *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - float *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseScsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, const double *csrSortedVal, - const int *csrSortedRowPtr, const int *csrSortedColInd, - double *cscSortedVal, int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - double *, int *, int *, cusparseAction_t, cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCcsr2csc(cusparseHandle_t handle, int m, int n, int nnz, - const cuComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csc( - cusparseHandle_t handle, int m, int n, int nnz, - const cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, - const int *csrSortedColInd, cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, int *cscSortedColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t idxBase) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrSortedVal, csrSortedRowPtr, - csrSortedColInd, cscSortedVal, cscSortedRowInd, - cscSortedColPtr, copyValues, idxBase); -} - -cusparseStatus_t CUSPARSEAPI cusparseSdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseSdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, int, - const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCdense2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *A, int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - int, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI -cusparseZdense2hyb(cusparseHandle_t handle, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *A, - int lda, const int *nnzPerRow, cusparseHybMat_t hybA, - int userEllWidth, cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, int, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZdense2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, A, lda, nnzPerRow, hybA, userEllWidth, - partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int); - static auto func_ptr = LoadSymbol("cusparseShyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int); - static auto func_ptr = LoadSymbol("cusparseDhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int); - static auto func_ptr = LoadSymbol("cusparseChyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2dense(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *A, int lda) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int); - static auto func_ptr = LoadSymbol("cusparseZhyb2dense"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, A, lda); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsr2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *csrSortedValA, - int *csrSortedRowPtrA, - int *csrSortedColIndA) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const float *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseScsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const double *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseDcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, - const int *, const int *, cusparseHybMat_t, int, cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseCcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsc2hyb( - cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, - const cuDoubleComplex *cscSortedValA, const int *cscSortedRowIndA, - const int *cscSortedColPtrA, cusparseHybMat_t hybA, int userEllWidth, - cusparseHybPartition_t partitionType) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, cusparseHybMat_t, int, - cusparseHybPartition_t); - static auto func_ptr = LoadSymbol("cusparseZcsc2hyb"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, descrA, cscSortedValA, cscSortedRowIndA, - cscSortedColPtrA, hybA, userEllWidth, partitionType); -} - -cusparseStatus_t CUSPARSEAPI cusparseShyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - float *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseShyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseDhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - double *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseChyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseChyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseZhyb2csc(cusparseHandle_t handle, - const cusparseMatDescr_t descrA, - const cusparseHybMat_t hybA, - cuDoubleComplex *cscSortedVal, - int *cscSortedRowInd, - int *cscSortedColPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, const cusparseMatDescr_t, const cusparseHybMat_t, - cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZhyb2csc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, descrA, hybA, cscSortedVal, cscSortedRowInd, - cscSortedColPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, const cusparseMatDescr_t, int *, int *); - static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, - nnzTotalDevHostPtr); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, blockDim, descrC, bsrSortedValC, - bsrSortedRowPtrC, bsrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, const cusparseMatDescr_t, - float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, const cusparseMatDescr_t, - double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, - const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, blockDim, descrC, csrSortedValC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const float *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, - int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const float *, const int *, const int *, - int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const double *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, - cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const double *, const int *, const int *, - int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, - void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuComplex *, const int *, - const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( - cusparseHandle_t handle, int mb, int nb, int nnzb, - const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, - const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, - cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, - cusparseAction_t copyValues, cusparseIndexBase_t baseIdx, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, - const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, - cusparseIndexBase_t, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, - bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, - bscColPtr, copyValues, baseIdx, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, - const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, int, int, const cusparseMatDescr_t, int *, - int *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, - const cusparseMatDescr_t, float *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, - const cusparseMatDescr_t, double *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, - int *csrSortedRowPtrC, int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, - int colBlockDim, const cusparseMatDescr_t descrC, - cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, - int *csrSortedColIndC) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, - csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, - int colBlockDim, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, - const int *csrSortedColIndA, const cusparseMatDescr_t descrC, - int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const int *, const int *, const cusparseMatDescr_t, int *, int, int, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, - colBlockDim, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const float *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const float *, const int *, const int *, const cusparseMatDescr_t, - float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const double *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const double *, const int *, const int *, const cusparseMatDescr_t, - double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuComplex *, const int *, const int *, const cusparseMatDescr_t, - cuComplex *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, - const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, - const int *csrSortedRowPtrA, const int *csrSortedColIndA, - const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, - int colBlockDim, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, - const cuDoubleComplex *, const int *, const int *, - const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, - csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, - int *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, int *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, int, int, size_t *); - static auto func_ptr = - LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); -} - -cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, - const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, - const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, - int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const int *, const int *, int, int, - const cusparseMatDescr_t, int *, int, int, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, - bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, - bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const float *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const float *, const int *, const int *, int, - int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const double *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const double *, const int *, const int *, int, - int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, - int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, - int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuComplex *, const int *, const int *, - int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, - void *); - static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( - cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, - const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, - const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, - int colBlockDimA, const cusparseMatDescr_t descrC, - cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, - int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, cusparseDirection_t, int, int, int, - const cusparseMatDescr_t, const cuDoubleComplex *, const int *, - const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, - int *, int, int, void *); - static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, - bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, - colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, - bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI -cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); - static auto func_ptr = - LoadSymbol("cusparseCreateIdentityPermutation"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, n, p); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, - const int *cooColsA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, int *cooColsA, - int *P, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, - int m, int n, int nnz, - int *cooRowsA, - int *cooColsA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, int *, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, - const int *csrColIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *csrRowPtrA, - int *csrColIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcsrsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, - const int *cscRowIndA, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const int *, const int *, size_t *); - static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, - int n, int nnz, - const cusparseMatDescr_t descrA, - const int *cscColPtrA, - int *cscRowIndA, int *P, - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, - int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseXcscsort"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, float *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, double *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, - csru2csrInfo_t, size_t *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseScsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, - int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, - const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( - cusparseHandle_t handle, int m, int n, int nnz, - const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, - const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, - cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); - static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, - const float *csrValC, const int *csrRowPtrC, const int *csrColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, const float *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrValC, csrRowPtrC, - csrColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, - const double *csrValC, const int *csrRowPtrC, const int *csrColIndC, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, const double *, const int *, const int *, - size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrValC, csrRowPtrC, - csrColIndC, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, - nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - const float *threshold, const cusparseMatDescr_t descrC, float *csrValC, - const int *csrRowPtrC, int *csrColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, const float *, - const cusparseMatDescr_t, float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrValC, csrRowPtrC, - csrColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - const double *threshold, const cusparseMatDescr_t descrC, double *csrValC, - const int *csrRowPtrC, int *csrColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, const double *, - const cusparseMatDescr_t, double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, threshold, descrC, csrValC, csrRowPtrC, - csrColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrValA, - const int *csrRowPtrA, const int *csrColIndA, const float *threshold, - const cusparseMatDescr_t descrC, const float *csrValC, - const int *csrRowPtrC, const int *csrColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - const float *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - threshold, descrC, csrValC, csrRowPtrC, csrColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrValA, - const int *csrRowPtrA, const int *csrColIndA, const double *threshold, - const cusparseMatDescr_t descrC, const double *csrValC, - const int *csrRowPtrC, const int *csrColIndC, size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - const double *, const int *, const int *, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - threshold, descrC, csrValC, csrRowPtrC, csrColIndC, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrValA, - const int *csrRowPtrA, const int *csrColIndA, const float *threshold, - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - threshold, descrC, csrRowPtrC, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrValA, - const int *csrRowPtrA, const int *csrColIndA, const double *threshold, - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, int *, - int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - threshold, descrC, csrRowPtrC, nnzTotalDevHostPtr, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrValA, - const int *csrRowPtrA, const int *csrColIndA, const float *threshold, - const cusparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, - int *csrColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, const float *, const cusparseMatDescr_t, - float *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - threshold, descrC, csrValC, csrRowPtrC, csrColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrValA, - const int *csrRowPtrA, const int *csrColIndA, const double *threshold, - const cusparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, - int *csrColIndC, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, const double *, const cusparseMatDescr_t, - double *, const int *, int *, void *); - static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - threshold, descrC, csrValC, csrRowPtrC, csrColIndC, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const float *csrValC, - const int *csrRowPtrC, const int *csrColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, const float *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrValC, csrRowPtrC, - csrColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const double *csrValC, - const int *csrRowPtrC, const int *csrColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, const double *, const int *, const int *, - pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrValC, csrRowPtrC, - csrColIndC, info, pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, - nnzTotalDevHostPtr, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const float *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, - int *csrColIndC, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const float *, int, float, - const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrValC, csrRowPtrC, - csrColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( - cusparseHandle_t handle, int m, int n, const double *A, int lda, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, - int *csrColIndC, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, const double *, int, float, - const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, - void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneDense2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, A, lda, percentage, descrC, csrValC, csrRowPtrC, - csrColIndC, info, pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrValA, - const int *csrRowPtrA, const int *csrColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const float *csrValC, - const int *csrRowPtrC, const int *csrColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, const float *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - percentage, descrC, csrValC, csrRowPtrC, csrColIndC, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrValA, - const int *csrRowPtrA, const int *csrColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, const double *csrValC, - const int *csrRowPtrC, const int *csrColIndC, pruneInfo_t info, - size_t *pBufferSizeInBytes) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, const double *, - const int *, const int *, pruneInfo_t, size_t *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - percentage, descrC, csrValC, csrRowPtrC, csrColIndC, info, - pBufferSizeInBytes); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrValA, - const int *csrRowPtrA, const int *csrColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - percentage, descrC, csrRowPtrC, nnzTotalDevHostPtr, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrValA, - const int *csrRowPtrA, const int *csrColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, int *csrRowPtrC, - int *nnzTotalDevHostPtr, /* can be on host or device */ - pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, int *, int *, - pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - percentage, descrC, csrRowPtrC, nnzTotalDevHostPtr, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const float *csrValA, - const int *csrRowPtrA, const int *csrColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, - int *csrColIndC, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, - const int *, const int *, float, const cusparseMatDescr_t, float *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseSpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - percentage, descrC, csrValC, csrRowPtrC, csrColIndC, info, - pBuffer); -} - -cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( - cusparseHandle_t handle, int m, int n, int nnzA, - const cusparseMatDescr_t descrA, const double *csrValA, - const int *csrRowPtrA, const int *csrColIndA, - float percentage, /* between 0 to 100 */ - const cusparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, - int *csrColIndC, pruneInfo_t info, void *pBuffer) { - using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( - cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, - const int *, const int *, float, const cusparseMatDescr_t, double *, - const int *, int *, pruneInfo_t, void *); - static auto func_ptr = - LoadSymbol("cusparseDpruneCsr2csrByPercentage"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, - percentage, descrC, csrValC, csrRowPtrC, csrColIndC, info, - pBuffer); -} - -} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_stub.cc index a138c014248904..16141e51e2613b 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cusparse_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cusparse_stub.cc @@ -26,8 +26,7 @@ void* GetDsoHandle() { return nullptr; #else static auto handle = []() -> void* { - auto handle_or = - tsl::internal::DsoLoader::GetCusparseDsoHandle(); + auto handle_or = tsl::internal::DsoLoader::GetCusparseDsoHandle(); if (!handle_or.ok()) return nullptr; return handle_or.value(); }(); @@ -35,32 +34,40 @@ void* GetDsoHandle() { #endif } -template -T LoadSymbol(const char* symbol_name) { +void* LoadSymbol(const char* symbol_name) { void* symbol = nullptr; if (auto handle = GetDsoHandle()) { tsl::Env::Default() ->GetSymbolFromLibrary(handle, symbol_name, &symbol) .IgnoreError(); } - return reinterpret_cast(symbol); + return symbol; } -cusparseStatus_t GetSymbolNotFoundError() { +const char* kSymbols[] = { +#include "tsl/cuda/cusparse.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + +} // namespace + +extern "C" { + +static cusparseStatus_t GetSymbolNotFoundError() { return CUSPARSE_STATUS_INTERNAL_ERROR; } -} // namespace -#if CUDA_VERSION < 10000 -#include "tsl/cuda/cusparse_9_0.inc" -#elif CUDA_VERSION < 10010 -#include "tsl/cuda/cusparse_10_0.inc" -#elif CUDA_VERSION < 10020 -#include "tsl/cuda/cusparse_10_1.inc" -#elif CUDA_VERSION < 11000 -#include "tsl/cuda/cusparse_10_2.inc" -#elif CUDA_VERSION < 12000 -#include "tsl/cuda/cusparse_11_0.inc" -#else -#include "tsl/cuda/cusparse_12_0.inc" -#endif +extern void* _cusparse_tramp_table[]; + +void _cusparse_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + _cusparse_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/cuda/stub.bzl b/third_party/xla/third_party/tsl/tsl/cuda/stub.bzl new file mode 100644 index 00000000000000..0dbfad09658083 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/stub.bzl @@ -0,0 +1,26 @@ +"""Macros to generate CUDA library stubs from a list of symbols.""" + +def cuda_stub(name, srcs): + """Generates a CUDA stub from a list of symbols. + + Generates two files: + * library.inc, which contains a list of symbols suitable for inclusion by + C++, and + * library.tramp.S, which contains assembly-language trampolines for each + symbol. + """ + native.genrule( + name = "{}_stub_gen".format(name), + srcs = srcs, + tools = ["//third_party/implib_so:make_stub"], + outs = [ + "{}.inc".format(name), + "{}.tramp.S".format(name), + ], + tags = ["gpu"], + cmd = select({ + "//tsl:linux_aarch64": "$(location //third_party/implib_so:make_stub) $< --outdir $(RULEDIR) --target aarch64", + "//tsl:linux_x86_64": "$(location //third_party/implib_so:make_stub) $< --outdir $(RULEDIR) --target x86_64", + "//conditions:default": "NOT_IMPLEMENTED_FOR_THIS_PLATFORM_OR_ARCHITECTURE", + }), + ) diff --git a/third_party/xla/third_party/tsl/workspace2.bzl b/third_party/xla/third_party/tsl/workspace2.bzl index ac600c35390300..f0c95504df6757 100644 --- a/third_party/xla/third_party/tsl/workspace2.bzl +++ b/third_party/xla/third_party/tsl/workspace2.bzl @@ -2,42 +2,43 @@ # Import third party config rules. load("@bazel_skylib//lib:versions.bzl", "versions") -load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") -load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") -load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") -load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") -load("//third_party/git:git_configure.bzl", "git_configure") -load("//third_party/py:python_configure.bzl", "python_configure") -load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure") -load("//tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") -load("//tools/toolchains:cpus/arm/arm_compiler_configure.bzl", "arm_compiler_configure") -load("//tools/toolchains/embedded/arm-linux:arm_linux_toolchain_configure.bzl", "arm_linux_toolchain_configure") + +# Import external repository rules. +load("@bazel_tools//tools/build_defs/repo:java.bzl", "java_import_external") +load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +load("@tf_runtime//:dependencies.bzl", "tfrt_dependencies") load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") -load("//third_party/clang_toolchain:cc_configure_clang.bzl", "cc_download_clang_toolchain") -load("//tools/def_file_filter:def_file_filter_configure.bzl", "def_file_filter_configure") -load("//third_party/llvm:setup.bzl", "llvm_setup") # Import third party repository rules. See go/tfbr-thirdparty. load("//third_party/absl:workspace.bzl", absl = "repo") load("//third_party/benchmark:workspace.bzl", benchmark = "repo") +load("//third_party/clang_toolchain:cc_configure_clang.bzl", "cc_download_clang_toolchain") load("//third_party/eigen3:workspace.bzl", eigen3 = "repo") load("//third_party/farmhash:workspace.bzl", farmhash = "repo") load("//third_party/gemmlowp:workspace.bzl", gemmlowp = "repo") +load("//third_party/git:git_configure.bzl", "git_configure") +load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/hwloc:workspace.bzl", hwloc = "repo") +load("//third_party/implib_so:workspace.bzl", implib_so = "repo") load("//third_party/jpeg:workspace.bzl", jpeg = "repo") +load("//third_party/llvm:setup.bzl", "llvm_setup") load("//third_party/nasm:workspace.bzl", nasm = "repo") +load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") +load("//third_party/py:python_configure.bzl", "python_configure") load("//third_party/py/ml_dtypes:workspace.bzl", ml_dtypes = "repo") load("//third_party/pybind11_abseil:workspace.bzl", pybind11_abseil = "repo") load("//third_party/pybind11_bazel:workspace.bzl", pybind11_bazel = "repo") +load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure") +load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") load("//third_party/tensorrt:workspace.bzl", tensorrt = "repo") - -# Import external repository rules. -load("@bazel_tools//tools/build_defs/repo:java.bzl", "java_import_external") -load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") -load("@tf_runtime//:dependencies.bzl", "tfrt_dependencies") -load("//tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") -load("//tools/toolchains/remote:configure.bzl", "remote_execution_configure") +load("//tools/def_file_filter:def_file_filter_configure.bzl", "def_file_filter_configure") +load("//tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") +load("//tools/toolchains:cpus/arm/arm_compiler_configure.bzl", "arm_compiler_configure") load("//tools/toolchains/clang6:repo.bzl", "clang6_configure") +load("//tools/toolchains/embedded/arm-linux:arm_linux_toolchain_configure.bzl", "arm_linux_toolchain_configure") +load("//tools/toolchains/remote:configure.bzl", "remote_execution_configure") +load("//tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") def _initialize_third_party(): """ Load third party repositories. See above load() statements. """ @@ -47,6 +48,7 @@ def _initialize_third_party(): farmhash() gemmlowp() hwloc() + implib_so() jpeg() ml_dtypes() nasm() From 04c49a0785d25d0f1878605a719748d6e22d0bcc Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 22 Sep 2023 09:03:24 -0700 Subject: [PATCH 147/567] Cache buffer sizes in `BufferAssignment` when they are accessed. In some cases, the buffer-size computation involves a non-trivial amount of work, and executing it multiple times per value can be very expensive. PiperOrigin-RevId: 567636486 --- third_party/xla/xla/service/buffer_assignment.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/third_party/xla/xla/service/buffer_assignment.h b/third_party/xla/xla/service/buffer_assignment.h index 8c43b47fe9ded5..da46122da3b6f0 100644 --- a/third_party/xla/xla/service/buffer_assignment.h +++ b/third_party/xla/xla/service/buffer_assignment.h @@ -551,10 +551,13 @@ class BufferAssignment { BufferAllocation* GetMutableAllocation(BufferAllocation::Index index); int64_t HloBufferSize(const HloBuffer& buffer) { + auto iter = cached_buffer_sizes_.find(buffer.id()); + if (iter != cached_buffer_sizes_.end()) return iter->second; int64_t result = 0; for (const HloValue* value : buffer.values()) { result = std::max(result, buffer_size_(*value)); } + cached_buffer_sizes_.insert({buffer.id(), result}); return result; } @@ -593,6 +596,8 @@ class BufferAssignment { Stats stats_; + absl::flat_hash_map cached_buffer_sizes_; + BufferAssignment(const BufferAssignment&) = delete; BufferAssignment& operator=(const BufferAssignment&) = delete; }; From 6b81dd15bd52110374f287b9d54ab7660fb0ce20 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 09:10:54 -0700 Subject: [PATCH 148/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/3b7012849fde349a679c495f5bd421935d6599f9. PiperOrigin-RevId: 567638210 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index 3671380535a7b8..7fb69c7a3ed804 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e2e1f9f197af799cbf558eacd26221695a7971ce" - TFRT_SHA256 = "ef6ca7d0ab5fce018a8ae64500de96dffd838d1ed4bef514d797ba2b99bd8908" + TFRT_COMMIT = "3b7012849fde349a679c495f5bd421935d6599f9" + TFRT_SHA256 = "aff0363d40d564af9ad599f40b4b24b819e84549eb7ea9d5fb8875817c76a4bf" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index 3671380535a7b8..7fb69c7a3ed804 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e2e1f9f197af799cbf558eacd26221695a7971ce" - TFRT_SHA256 = "ef6ca7d0ab5fce018a8ae64500de96dffd838d1ed4bef514d797ba2b99bd8908" + TFRT_COMMIT = "3b7012849fde349a679c495f5bd421935d6599f9" + TFRT_SHA256 = "aff0363d40d564af9ad599f40b4b24b819e84549eb7ea9d5fb8875817c76a4bf" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index 3671380535a7b8..7fb69c7a3ed804 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e2e1f9f197af799cbf558eacd26221695a7971ce" - TFRT_SHA256 = "ef6ca7d0ab5fce018a8ae64500de96dffd838d1ed4bef514d797ba2b99bd8908" + TFRT_COMMIT = "3b7012849fde349a679c495f5bd421935d6599f9" + TFRT_SHA256 = "aff0363d40d564af9ad599f40b4b24b819e84549eb7ea9d5fb8875817c76a4bf" tf_http_archive( name = "tf_runtime", From f697bba3b44fbb5230714acaf2a8412bcae4ec2a Mon Sep 17 00:00:00 2001 From: Ramesh Sampath Date: Fri, 22 Sep 2023 10:06:25 -0700 Subject: [PATCH 149/567] Remove allow_multiple_exports param from tf_export. PiperOrigin-RevId: 567651676 --- tensorflow/python/util/tf_export.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/util/tf_export.py b/tensorflow/python/util/tf_export.py index 7caaa07319f77e..ade7504ca75fae 100644 --- a/tensorflow/python/util/tf_export.py +++ b/tensorflow/python/util/tf_export.py @@ -279,6 +279,7 @@ def __init__( *args: str, api_name: str = TENSORFLOW_API_NAME, v1: Optional[Sequence[str]] = None, + allow_multiple_exports: bool = True, # pylint: disable=unused-argument ): """Export under the names *args (first one is considered canonical). @@ -288,6 +289,7 @@ def __init__( `estimator`). Default is `tensorflow`. v1: Names for the TensorFlow V1 API. If not set, we will use V2 API names both for TensorFlow V1 and V2 APIs. + allow_multiple_exports: Deprecated. """ self._names = args self._names_v1 = v1 if v1 is not None else args @@ -318,11 +320,13 @@ def _validate_symbol_names(self) -> None: '@tf_export is not allowed to export symbols under %s.*' % (subpackage) ) - elif not all(n.startswith(self._api_name) for n in all_symbol_names): - raise InvalidSymbolNameError( - 'Can only export symbols under package name of component. e.g.' - ' tensorflow_estimator must export all symbols under tf.estimator' - ) + else: + if not all(n.startswith(self._api_name) for n in all_symbol_names): + raise InvalidSymbolNameError( + 'Can only export symbols under package name of component. ' + 'e.g. tensorflow_estimator must export all symbols under ' + 'tf.estimator' + ) def __call__(self, func: T) -> T: """Calls this decorator. @@ -407,6 +411,7 @@ def __call__( self, *v2: str, v1: Optional[Sequence[str]] = None, + allow_multiple_exports: bool = True, # Deprecated, no-op ) -> api_export: ... From 6cfebd9843e5944f49eb937ead055bec0708c080 Mon Sep 17 00:00:00 2001 From: Shibo Wang Date: Fri, 22 Sep 2023 10:21:36 -0700 Subject: [PATCH 150/567] Improve shardings if the users of a const broadcast have conflict shardings. PiperOrigin-RevId: 567655772 --- .../xla/xla/hlo/transforms/hlo_constant_splitter.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/hlo/transforms/hlo_constant_splitter.cc b/third_party/xla/xla/hlo/transforms/hlo_constant_splitter.cc index 461824e53d5eb3..c434e9509e4908 100644 --- a/third_party/xla/xla/hlo/transforms/hlo_constant_splitter.cc +++ b/third_party/xla/xla/hlo/transforms/hlo_constant_splitter.cc @@ -42,6 +42,9 @@ bool IsSupportedConstantExpression(const HloInstruction* instruction) { StatusOr DuplicateConstantExpressionPerUser(HloComputation* computation, HloInstruction* to_clone, HloInstruction* user) { + if (to_clone->user_count() == 1) { + return false; + } absl::InlinedVector, 8> worklist( 1, std::make_pair(to_clone, 0)); absl::InlinedVector to_clone_vec; @@ -153,7 +156,7 @@ StatusOr HloConstantSplitter::Run( // Perform duplication of the constants/constant expressions. for (HloInstruction* instruction : constants_list) { - if (instruction->user_count() == 0) { + if (instruction->user_count() <= 1) { continue; } absl::InlinedVector users; @@ -161,7 +164,7 @@ StatusOr HloConstantSplitter::Run( // Consider for splitting only leaf expressions (not constants in the // middle of a constant expression). Also only split for non-constant // users for expressions. - for (int i = 1; i < instruction->user_count(); ++i) { + for (int i = 0; i < instruction->user_count(); ++i) { if (instruction->opcode() == HloOpcode::kConstant || !constants_set.contains(instruction->users()[i])) { users.push_back(instruction->users()[i]); From ac8da081f7c9af3ce661a4e84dc699762d6830f7 Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Fri, 22 Sep 2023 10:24:37 -0700 Subject: [PATCH 151/567] Internal Code Change PiperOrigin-RevId: 567656609 --- tensorflow/lite/python/authoring/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/lite/python/authoring/BUILD b/tensorflow/lite/python/authoring/BUILD index c51e9f249e9414..cf1b46790f5112 100644 --- a/tensorflow/lite/python/authoring/BUILD +++ b/tensorflow/lite/python/authoring/BUILD @@ -6,7 +6,6 @@ package( "//tensorflow:internal", "//tensorflow_estimator:__subpackages__", "//tensorflow_federated:__subpackages__", - "//third_party/py/keras:__subpackages__", "//third_party/py/tensorflow:__subpackages__", ], licenses = ["notice"], From 9a2a49232ca43b6254ecdce2f5549f1d71339b44 Mon Sep 17 00:00:00 2001 From: Sagun Bajra Date: Fri, 22 Sep 2023 10:43:20 -0700 Subject: [PATCH 152/567] Avoid deleting the boolean inputs from the top level FunctionDef's input signature when applying small_constants_optimizer. PiperOrigin-RevId: 567661561 --- tensorflow/core/common_runtime/eager/execute.cc | 17 +++++++++++------ .../eager/small_constants_optimizer.cc | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 60c65e60d5c61b..2d37223de46e90 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -80,6 +80,7 @@ limitations under the License. #include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/util/device_name_utils.h" #include "tsl/platform/fingerprint.h" +#include "tsl/platform/statusor.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/distributed_runtime/eager/eager_client.h" #include "tensorflow/core/distributed_runtime/eager/remote_copy_node.h" @@ -1063,11 +1064,13 @@ bool IntArgsAndRetvalsOnDevice(EagerOperation* op, using BoolTensorInputs = std::vector>; -// Removes boolean tensor inputs from the EagerOperation and returns them. -// Currently this is only useful to invoke when small_constants_optimizer is -// enabled because the runtime will have equivalent FunctionDefs of the original -// tf.function without the boolean tensor input. -StatusOr RemoveBoolInputs(EagerOperation* op) { +// Identifies boolean tensor inputs from the EagerOperation and returns them. If +// delete_inputs is set to true then it will also delete them from the +// function's input signature. Currently this is only useful to invoke when +// small_constants_optimizer is enabled because the runtime will have equivalent +// FunctionDefs of the original tf.function without the boolean tensor input. +StatusOr GetBoolInputs(EagerOperation* op, + bool delete_inputs) { BoolTensorInputs result; if (!op->is_function()) return result; // Extract tensor inputs. @@ -1108,6 +1111,7 @@ StatusOr RemoveBoolInputs(EagerOperation* op) { result.emplace_back(input_arg.name(), input_value); } + if (!delete_inputs) return result; // If we were able to identify all boolean inputs, update the op's inputs. op->Clear(); for (auto* input : stripped_inputs) { @@ -1327,7 +1331,8 @@ Status GetOrCreateKernelAndDevice( // Update the EagerOperation with information about the boolean input tensors // when small constant optimization is enabled. if (IsSmallConstantOptimizationEnabled(*op)) { - TF_ASSIGN_OR_RETURN(BoolTensorInputs bool_inputs, RemoveBoolInputs(op)); + TF_ASSIGN_OR_RETURN(BoolTensorInputs bool_inputs, + GetBoolInputs(op, /*delete_inputs=*/false)); string folded_name = op->Name(); for (const auto& [input_name, input_value] : bool_inputs) { folded_name = small_constants_optimizer::FoldedFunctionName( diff --git a/tensorflow/core/common_runtime/eager/small_constants_optimizer.cc b/tensorflow/core/common_runtime/eager/small_constants_optimizer.cc index 7d43f29029f0ce..29f2fe833d6e60 100644 --- a/tensorflow/core/common_runtime/eager/small_constants_optimizer.cc +++ b/tensorflow/core/common_runtime/eager/small_constants_optimizer.cc @@ -244,14 +244,14 @@ void GenerateTrueAndFalseFunctions(const FunctionDef& fdef, // Add f_true(s). auto true_fdefs = FoldBoolInputTensor(fdef, input_name_to_fold, /*input_value=*/true, - /*delete_input=*/true, flib, folded_functions); + /*delete_input=*/false, flib, folded_functions); for (FunctionDef& fdef : true_fdefs) DisableBoolInputFolding(fdef); result.insert(result.end(), std::make_move_iterator(true_fdefs.begin()), std::make_move_iterator(true_fdefs.end())); // Add f_false(s). auto false_fdefs = FoldBoolInputTensor(fdef, input_name_to_fold, /*input_value=*/false, - /*delete_input=*/true, flib, folded_functions); + /*delete_input=*/false, flib, folded_functions); for (FunctionDef& fdef : false_fdefs) DisableBoolInputFolding(fdef); result.insert(result.end(), std::make_move_iterator(false_fdefs.begin()), std::make_move_iterator(false_fdefs.end())); From 189556e2126cf36015cd33f2e3fa3f6a7abd0687 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 10:52:57 -0700 Subject: [PATCH 153/567] Check if op kernel is XlaLocalLaunchOp before op kernel GetorCreate inside ExecuteOpConversion for MLRT PiperOrigin-RevId: 567664326 --- .../mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir | 22 +++++ .../compiler/mlir/tfrt/transforms/mlrt/BUILD | 2 + .../mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc | 95 ++++++++++++------- 3 files changed, 83 insertions(+), 36 deletions(-) diff --git a/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir b/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir index b468f8f88f9b1f..94a28091c7235f 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/mlrt/tf_to_mlrt.mlir @@ -436,3 +436,25 @@ func.func @unused_future(%x: tensor) -> tensor { // CHECK: mlrt.await_all_control [[unused]] return %x : tensor } + +// ----- + +// Test for XlaLaunch + +func.func private @xla_func_0(%arg0: tensor<1x3xf32>, %arg1: tensor<1x3xf32>) -> tensor<1x3xf32> attributes {tf._XlaMustCompile = true, tf._noinline = true, tf._original_func_name = "should_not_be_used"} { + %1 = "tf.AddV2"(%arg0, %arg1) {__op_key = 0: i32} : (tensor<1x3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32> + func.return %1 : tensor<1x3xf32> +} + +// CHECK-LABEL: func @xla_func +func.func @xla_func(%arg0: tensor<1x3xf32>) -> tensor<*xf32> attributes {tf.entry_function = {control_outputs = "", inputs = "input:0", outputs = "output:0"}} { + %0 = "tf.VarHandleOp"() {__op_key = 1: i32, device = "/device:CPU:0", container = "", shared_name = "variable"} : () -> tensor>> + %1 = "tf.ReadVariableOp"(%0) {__op_key = 2: i32, device = "/device:CPU:0"} : (tensor>>) -> tensor<1x3xf32> + // CHECK: tf_mlrt.executeop + // CHECK: tf_mlrt.async_executeop{{.*}}op: \22XlaLaunch\22\0A + // CHECK: tf_mlrt.await + // CHECK: return + // CHECK-SAME: !tf_mlrt.tensor + %2 = "tf.XlaLaunch"(%arg0, %1) {__op_key = 3: i32, _noinline = true, _xla_compile_device_type = "GPU", device = "/device:GPU:0", function = @xla_func_0, operandSegmentSizes = array} : (tensor<1x3xf32>, tensor<1x3xf32>) -> tensor<*xf32> + func.return %2 : tensor<*xf32> +} diff --git a/tensorflow/compiler/mlir/tfrt/transforms/mlrt/BUILD b/tensorflow/compiler/mlir/tfrt/transforms/mlrt/BUILD index 2efb225e93b6e4..03558438ac6f6b 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/mlrt/BUILD +++ b/tensorflow/compiler/mlir/tfrt/transforms/mlrt/BUILD @@ -72,6 +72,8 @@ cc_library( "//tensorflow/compiler/mlir/tfrt/ir/mlrt:mlrt_ops", "//tensorflow/compiler/mlir/tfrt/ir/mlrt:tf_mlrt_ops", "//tensorflow/compiler/mlir/tfrt/ir/mlrt:tf_mlrt_tpu_ops", + "//tensorflow/core:framework", + "//tensorflow/core/platform:status", "//tensorflow/core/tfrt/fallback:fallback_state", "//tensorflow/core/tfrt/fallback:op_kernel_runner_cache", "@com_google_protobuf//:protobuf_headers", diff --git a/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc b/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc index 603167cb0f5ef8..5f689640e39bd4 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/mlrt/tf_to_mlrt.cc @@ -47,6 +47,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tfrt/transforms/mlrt/tpu_conversion_patterns.h" #include "tensorflow/compiler/mlir/tfrt/transforms/mlrt/util.h" #include "tensorflow/compiler/mlir/tfrt/transforms/utils.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/tfrt/fallback/fallback_state.h" #include "tensorflow/core/tfrt/fallback/op_kernel_runner_cache.h" @@ -54,7 +56,8 @@ namespace tensorflow { namespace mlrt_compiler { namespace { -// TODO(chky): Add registration interface for custom device +constexpr char kXlaLaunchOp[] = "XlaLaunch"; + mlir::Value CreateCustomDevice(mlir::Location loc, llvm::StringRef device_name, mlir::ConversionPatternRewriter &rewriter) { if (device_name == kTpuHostDevice) { @@ -421,20 +424,6 @@ class ExecuteOpConversion final : public mlir::ConversionPattern { std::string node_def_text; google::protobuf::TextFormat::PrintToString(node_def, &node_def_text); - auto op_kernel_runner = op_kernel_cache_.GetOrCreate( - tfrt::Location(nullptr, execute_key), node_def.op(), node_def.device(), - op->getNumOperands(), - [&](tensorflow::AttrValueMap *attr_value_map) { - *attr_value_map = node_def.attr(); - return OkStatus(); - }, - fallback_state_.device_manager(), - fallback_state_.process_function_library_runtime()); - // TODO(290630314): Use LOG_IF when absl logging is available - if (!op_kernel_runner.ok()) { - std::cerr << op_kernel_runner.status() << "\n"; - } - mlir::Value device; if (auto custom_device = op->getAttrOfType(kTfMlrtCustomDevice)) { @@ -444,11 +433,8 @@ class ExecuteOpConversion final : public mlir::ConversionPattern { } mlir::Operation *new_op = nullptr; - if (op_kernel_runner.ok() && (*op_kernel_runner)->IsAsync()) { - // If it is an AsyncOpKernel, we lower it to tf_mlrt.async_executeop, - // which return !mlrt.futures. These results will be converted as - // necessary through the target materialization hook in the type - // converter. + + auto create_async_execute_ops = [&]() -> mlir::LogicalResult { llvm::SmallVector result_types( op->getNumResults(), rewriter.getType()); if (device) { @@ -462,25 +448,62 @@ class ExecuteOpConversion final : public mlir::ConversionPattern { execute_op_registry_.RegisterExecuteOp(new_op, execute_key))) { return op->emitWarning("Fail to register async op"); } + return mlir::success(); + }; + + // TODO(b/300999257): check whether to clean up for AoT mockGpu case later + + if (node_def.op() == kXlaLaunchOp) { + // XlaLaunch Op an AsyncOpKernel, we lower it to tf_mlrt.async_executeop, + // which return !mlrt.futures. These results will be converted as + // necessary through the target materialization hook in the type + // converter. + if (mlir::failed(create_async_execute_ops())) { + return mlir::failure(); + } } else { - // Otherwise, lower to tf_mlrt.executeop. - llvm::SmallVector result_types( - op->getNumResults(), rewriter.getType()); - if (device) { - new_op = rewriter.replaceOpWithNewOp( - op, result_types, device, operands, node_def_text, execute_key); - } else { - new_op = rewriter.replaceOpWithNewOp( - op, result_types, operands, node_def_text, execute_key); + auto op_kernel_runner = op_kernel_cache_.GetOrCreate( + tfrt::Location(nullptr, execute_key), node_def.op(), + node_def.device(), op->getNumOperands(), + [&](tensorflow::AttrValueMap *attr_value_map) { + *attr_value_map = node_def.attr(); + return OkStatus(); + }, + fallback_state_.device_manager(), + fallback_state_.process_function_library_runtime()); + // TODO(290630314): Use LOG_IF when absl logging is available + if (!op_kernel_runner.ok()) { + std::cerr << op_kernel_runner.status() << "\n"; } - if (op_kernel_runner.ok()) { - // Only register this executeop if its opkernel can be created. - // Otherwise, it is an unused op so we don't need to create them at - // runtime. - if (mlir::failed( - execute_op_registry_.RegisterExecuteOp(new_op, execute_key))) { - return op->emitWarning("Fail to register sync op"); + if (op_kernel_runner.ok() && (*op_kernel_runner)->IsAsync()) { + // If it is an AsyncOpKernel, we lower it to tf_mlrt.async_executeop, + // which return !mlrt.futures. These results will be converted as + // necessary through the target materialization hook in the type + // converter. + if (mlir::failed(create_async_execute_ops())) { + return mlir::failure(); + } + } else { + // Otherwise, lower to tf_mlrt.executeop. + llvm::SmallVector result_types( + op->getNumResults(), rewriter.getType()); + if (device) { + new_op = rewriter.replaceOpWithNewOp( + op, result_types, device, operands, node_def_text, execute_key); + } else { + new_op = rewriter.replaceOpWithNewOp( + op, result_types, operands, node_def_text, execute_key); + } + + if (op_kernel_runner.ok()) { + // Only register this executeop if its opkernel can be created. + // Otherwise, it is an unused op so we don't need to create them at + // runtime. + if (mlir::failed(execute_op_registry_.RegisterExecuteOp( + new_op, execute_key))) { + return op->emitWarning("Fail to register sync op"); + } } } } From 85ee33e2395f0881c5f20be651ef3fc56a246347 Mon Sep 17 00:00:00 2001 From: Neel Kovelamudi Date: Fri, 22 Sep 2023 11:00:23 -0700 Subject: [PATCH 154/567] Replicate small constants so they don't need to be sent to their successors. A small constant is replicated to each of its successo... PiperOrigin-RevId: 567666332 --- tensorflow/core/common_runtime/BUILD | 50 +-- .../replicate_constants_pass.cc | 189 ---------- .../common_runtime/replicate_constants_pass.h | 50 --- .../replicate_constants_pass_test.cc | 334 ------------------ tensorflow/core/config/flag_defs.h | 4 - tensorflow/core/config/flags_api_wrapper.cc | 1 - tensorflow/python/flags_pybind.pyi | 1 - 7 files changed, 9 insertions(+), 620 deletions(-) delete mode 100644 tensorflow/core/common_runtime/replicate_constants_pass.cc delete mode 100644 tensorflow/core/common_runtime/replicate_constants_pass.h delete mode 100644 tensorflow/core/common_runtime/replicate_constants_pass_test.cc diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 1db3c0d12ed9f5..4b7c5eb8b7c29d 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -1,9 +1,7 @@ -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( "//tensorflow:tensorflow.bzl", "if_google", "if_libtpu", - "if_macos", "if_oss", "if_zendnn", "tf_cc_test", @@ -23,24 +21,25 @@ load( "tf_protos_all", "tf_protos_grappler", ) -load( - "//tensorflow/core/platform:build_config_root.bzl", - "if_static", - "tf_cuda_tests_tags", -) load( "//tensorflow/core/platform:rules_cc.bzl", "cc_library", ) load( - "//tensorflow/security/fuzzing:tf_fuzzing.bzl", - "tf_cc_fuzz_test", + "//tensorflow/core/platform:build_config_root.bzl", + "if_static", + "tf_cuda_tests_tags", ) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( "//third_party/mkl:build_defs.bzl", "if_mkl", "if_mkl_ml", ) +load( + "//tensorflow/security/fuzzing:tf_fuzzing.bzl", + "tf_cc_fuzz_test", +) default_package_visibility = [ "//tensorflow:internal", @@ -300,7 +299,6 @@ filegroup( "renamed_device.h", "rendezvous_mgr.h", "rendezvous_util.h", - "replicate_constants_pass.h", "replicate_per_replica_nodes.h", "ring_alg.h", "ring_gatherer.h", @@ -1134,31 +1132,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "replicate_constants_pass", - srcs = ["replicate_constants_pass.cc"], - hdrs = ["replicate_constants_pass.h"], - copts = tf_copts(), - deps = [ - ":optimization_registry", - "//tensorflow/core:core_cpu_base", - "//tensorflow/core:framework", - "//tensorflow/core:portable_gif_internal", - "//tensorflow/core/config:flag_defs", - "//tensorflow/core/config:flags", - "//tensorflow/core/framework:node_def_util", - "//tensorflow/core/framework:tensor_proto_cc", - "//tensorflow/core/framework:tensor_shape_proto_cc", - "@com_google_absl//absl/container:btree", - "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", - ], - alwayslink = 1, -) - cc_library( name = "local_device", srcs = ["local_device.cc"], @@ -1975,10 +1948,7 @@ tf_cuda_library( ":step_stats_collector", ":threadpool_device", ":threadpool_device_factory", - ] + if_zendnn([":zen_layout_pass"]) + if_macos( - [], - [":replicate_constants_pass"], - ), + ] + if_zendnn([":zen_layout_pass"]), ) tf_cuda_library( @@ -2342,7 +2312,6 @@ tf_cc_tests( "optimization_registry_test.cc", "pending_counts_test.cc", "placer_inspection_required_ops_utils_test.cc", - "replicate_constants_pass_test.cc", "session_test.cc", "threadpool_device_test.cc", ], @@ -2373,7 +2342,6 @@ tf_cc_tests( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "//tensorflow/core/config:flag_defs", "//tensorflow/core/kernels:ops_util", "//tensorflow/core/nccl:collective_communicator", "//tensorflow/core/platform:regexp", diff --git a/tensorflow/core/common_runtime/replicate_constants_pass.cc b/tensorflow/core/common_runtime/replicate_constants_pass.cc deleted file mode 100644 index 73f96d66f940bb..00000000000000 --- a/tensorflow/core/common_runtime/replicate_constants_pass.cc +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/common_runtime/replicate_constants_pass.h" - -#include -#include -#include -#include -#include - -#include "absl/container/btree_map.h" -#include "absl/status/status.h" -#include "absl/strings/str_cat.h" -#include "tensorflow/core/common_runtime/optimization_registry.h" -#include "tensorflow/core/config/flag_defs.h" -#include "tensorflow/core/config/flags.h" -#include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor.pb.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_shape.pb.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/util/device_name_utils.h" -#include "tensorflow/core/util/dump_graph.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/status.h" -#include "tsl/platform/statusor.h" - -namespace tensorflow { -namespace { - -// Maximum size constant to replicate. -constexpr int64_t kMaxSize = 16; - -// Set `node`'s name to /replicate/_ -void SetUniqueName(Graph* graph, Node* node) { - node->set_name(graph->NewName(absl::StrCat(node->name(), "/replicate"))); -} - -// `node` has an output control edge. -bool HasControlOut(Node* node) { - auto control_out_it = - std::find_if(node->out_edges().begin(), node->out_edges().end(), - [](const auto& e) { return e->IsControlEdge(); }); - return control_out_it != node->out_edges().end(); -} - -// `node`'s device is a CPU. -bool HasCpuDevice(const Node* node) { - DeviceNameUtils::ParsedName device; - if (!DeviceNameUtils::ParseFullName(node->assigned_device_name(), &device)) - return false; - return device.type == "CPU"; -} - -// Get the CPU device on the same host as dst. -Status GetDestinationCpuDevice(const Node* dst, std::string* device) { - if (!dst->has_assigned_device_name()) - return absl::AbortedError( - absl::StrCat("Node name: ", dst->name(), " has no assigned device.")); - return DeviceNameUtils::DeviceNameToCpuDeviceName(dst->assigned_device_name(), - device); -} - -// Collect the successor edges of the constant. Group them by the device of the -// successor. -Status GetSuccessorEdges( - Node* node, - absl::btree_map>& device_to_edges) { - for (const auto& edge : node->out_edges()) { - const Node* dst = edge->dst(); - std::string device; - TF_RETURN_IF_ERROR(GetDestinationCpuDevice(dst, &device)); - if (!device_to_edges.count(device)) device_to_edges.insert({device, {}}); - device_to_edges[device].push_back(edge); - } - return OkStatus(); -} - -// Replicate the constant to each successor device. -void ReplicateToEachDevice( - Graph* graph, Node* node, - absl::btree_map>& device_to_edges) { - for (const auto& pair : device_to_edges) { - Node* copy = graph->CopyNode(node); - SetUniqueName(graph, copy); - const std::string device = pair.first; - copy->set_assigned_device_name(device); - // Set the successor edges to ops on this device. - for (const Edge* edge : pair.second) { - graph->AddEdge(copy, edge->src_output(), edge->dst(), edge->dst_input()); - } - // Replicate in edges that are control. - for (Node* src : node->in_nodes()) { - graph->AddControlEdge(src, copy, true); - } - } - graph->RemoveNode(node); -} - -} // namespace - -Status ReplicateConstantsPass::Run( - const GraphOptimizationPassOptions& options) { - if (!flags::Global().replicate_small_constants.value()) { - VLOG(1) << "replicate_constants_pass not enabled"; - return OkStatus(); - } - VLOG(1) << "replicate_constants_pass will replicate constants with " - "number-of-elements <= " - << kMaxSize; - - Graph* graph = options.graph->get(); - if (VLOG_IS_ON(1)) { - VLOG(1) << DumpGraphToFile("before_replicate_constants_pass", *graph, - options.flib_def); - } - int64_t min_skipped = std::numeric_limits::max(); - int64_t max_skipped = std::numeric_limits::min(); - for (Node* node : graph->nodes()) { - if (!node->IsConstant()) continue; - - // For performance, skip when there is at most one successor. - if (node->out_edges().size() <= 1) continue; - - // Skip if the constant has a control successor. Replicating constants with - // control successors would require relpicating these control edges, which - // could result in even more message passing. - if (HasControlOut(node)) continue; - - // Skip if the constant is too large. - const TensorProto* value = nullptr; - TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "value", &value)); - TF_ASSIGN_OR_RETURN(TensorShape shape, - TensorShape::BuildTensorShape(value->tensor_shape())); - if (shape.num_elements() > kMaxSize) { - min_skipped = std::min(min_skipped, shape.num_elements()); - max_skipped = std::max(max_skipped, shape.num_elements()); - continue; - } - - // Skip if there is no assigned device. - if (!node->has_assigned_device_name()) continue; - - // Skip when the original constant is not on a CPU, because is not clear - // whether replicating from non-CPU to CPU is valid. - if (!HasCpuDevice(node)) continue; - - // Collect successor edges, per device. - absl::btree_map> device_to_edges; - TF_RETURN_IF_ERROR(GetSuccessorEdges(node, device_to_edges)); - - // Skip if all successors are on the same device. - if (device_to_edges.size() <= 1) continue; - - // Replicate the constant to each successor device. - ReplicateToEachDevice(graph, node, device_to_edges); - } - if (min_skipped != std::numeric_limits::max()) { - VLOG(1) << "replicate_constants_pass skipped replicating constants with " - "number of elements in the range " - << min_skipped << " to " << max_skipped << "."; - } - - if (VLOG_IS_ON(1)) { - VLOG(1) << DumpGraphToFile("after_replicate_constants_pass", *graph, - options.flib_def); - } - return OkStatus(); -} - -REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PLACEMENT, 3, - ReplicateConstantsPass); - -} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/replicate_constants_pass.h b/tensorflow/core/common_runtime/replicate_constants_pass.h deleted file mode 100644 index b7b2f0fe98c0d2..00000000000000 --- a/tensorflow/core/common_runtime/replicate_constants_pass.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ - -#include "tensorflow/core/common_runtime/optimization_registry.h" - -// Small constants are replicated to the hosts of their successors. This pass -// only applies when there are multiple successors. -// -// For example, the graph: -// C -> {Op0, Op1, Op2, Op3} -// C's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:CPU:0 -// Op0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:0 -// Op1's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:1 -// Op2's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:0 -// Op3's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:1 -// is rewritten to: -// C0 -> {Op0, Op1} -// C1 -> {Op2, Op3} -// C0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:CPU:0 -// C1's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:CPU:0 -// Op0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:0 -// Op1's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:1 -// Op2's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:0 -// Op3's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:1 - -namespace tensorflow { - -class ReplicateConstantsPass : public GraphOptimizationPass { - public: - Status Run(const GraphOptimizationPassOptions& options) override; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ diff --git a/tensorflow/core/common_runtime/replicate_constants_pass_test.cc b/tensorflow/core/common_runtime/replicate_constants_pass_test.cc deleted file mode 100644 index dae22012bfba8b..00000000000000 --- a/tensorflow/core/common_runtime/replicate_constants_pass_test.cc +++ /dev/null @@ -1,334 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/common_runtime/replicate_constants_pass.h" - -#include -#include - -#include "tensorflow/cc/framework/ops.h" -#include "tensorflow/cc/framework/scope.h" -#include "tensorflow/cc/ops/const_op.h" -#include "tensorflow/cc/ops/math_ops.h" -#include "tensorflow/core/common_runtime/optimization_registry.h" -#include "tensorflow/core/config/flag_defs.h" -#include "tensorflow/core/config/flags.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/platform/test.h" -#include "tsl/lib/core/status_test_util.h" -#include "tsl/platform/status.h" -#include "tsl/platform/test.h" - -namespace tensorflow { - -const char kCpu0[] = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"; -const char kCpu1[] = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"; -const char kTpu00[] = "/job:tpu_host_worker/replica:0/task:0/device:TPU:0"; -const char kTpu01[] = "/job:tpu_host_worker/replica:0/task:0/device:TPU:1"; -const char kTpu10[] = "/job:tpu_host_worker/replica:0/task:1/device:TPU:0"; -const char kTpu11[] = "/job:tpu_host_worker/replica:0/task:1/device:TPU:1"; - -// Return the node with name `name`. -Node* GetNode(const Graph& graph, const std::string& name) { - for (Node* node : graph.nodes()) { - if (node->name() == name) return node; - } - CHECK(false) << "Unknown node name: " << name; - return nullptr; -} - -// Return the first predecessor of `node`. -Node* GetPredecessor(Node* node) { - auto it = node->in_nodes().begin(); - CHECK(it != node->in_nodes().end()) - << "No predecessor for " << node->name() << "\n"; - return *it; -} - -// There exists an edge from `src` to `dst`. -bool IsEdge(Node* src, Node* dst) { - for (Node* node : src->out_nodes()) { - if (node == dst) return true; - } - return false; -} - -// Test that a small constant is replicated to each successor's device. -TEST(ReplicateConstantsPassTest, TestSmallConstant) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - Output const0 = - ops::Const(scope.WithOpName("const"), 1.0f, TensorShape({})); - ops::Negate dst0(scope.WithOpName("dst0"), const0); - ops::Negate dst1(scope.WithOpName("dst1"), const0); - ops::Negate dst2(scope.WithOpName("dst2"), const0); - TF_CHECK_OK(scope.ToGraph(graph.get())); - } - GetNode(*graph, "const")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); - GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); - - // Enable the pass. - flags::Global().replicate_small_constants.reset(true); - - GraphDef before; - graph->ToGraphDef(&before); - GraphOptimizationPassOptions options; - options.graph = &graph; - ReplicateConstantsPass pass; - TF_ASSERT_OK(pass.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - Node* dst0 = GetNode(*graph, "dst0"); - Node* dst1 = GetNode(*graph, "dst1"); - Node* dst2 = GetNode(*graph, "dst2"); - EXPECT_EQ(dst0->assigned_device_name(), - GetPredecessor(dst0)->assigned_device_name()); - EXPECT_EQ(dst1->assigned_device_name(), - GetPredecessor(dst1)->assigned_device_name()); - EXPECT_EQ(dst2->assigned_device_name(), - GetPredecessor(dst2)->assigned_device_name()); -} - -// Test that a large constant is ignored. -TEST(ReplicateConstantsPassTest, TestLargeConstant) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - Output const0 = - ops::Const(scope.WithOpName("const"), - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - ops::Negate dst0(scope.WithOpName("dst0"), const0); - ops::Negate dst1(scope.WithOpName("dst1"), const0); - ops::Negate dst2(scope.WithOpName("dst2"), const0); - TF_CHECK_OK(scope.ToGraph(graph.get())); - } - GetNode(*graph, "const")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); - GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); - - // Enable the pass. - flags::Global().replicate_small_constants.reset(true); - - GraphDef before; - graph->ToGraphDef(&before); - GraphOptimizationPassOptions options; - options.graph = &graph; - ReplicateConstantsPass pass; - TF_ASSERT_OK(pass.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - Node* dst0 = GetNode(*graph, "dst0"); - Node* dst1 = GetNode(*graph, "dst1"); - Node* dst2 = GetNode(*graph, "dst2"); - EXPECT_EQ(dst0->assigned_device_name(), - GetPredecessor(dst0)->assigned_device_name()); - EXPECT_NE(dst1->assigned_device_name(), - GetPredecessor(dst1)->assigned_device_name()); - EXPECT_NE(dst2->assigned_device_name(), - GetPredecessor(dst2)->assigned_device_name()); -} - -// Test that a constant with a control successor is ignored. -TEST(ReplicateConstantsPassTest, TestControlOut) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - Output const0 = - ops::Const(scope.WithOpName("const0"), 1.0f, TensorShape({})); - Output ctrl_succ = - ops::Const(scope.WithOpName("ctrl_succ"), 1.0f, TensorShape({})); - ops::Negate dst0(scope.WithOpName("dst0"), const0); - ops::Negate dst1(scope.WithOpName("dst1"), const0); - ops::Negate dst2(scope.WithOpName("dst2"), const0); - TF_CHECK_OK(scope.ToGraph(graph.get())); - } - GetNode(*graph, "const0")->set_assigned_device_name(kCpu0); - GetNode(*graph, "ctrl_succ")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); - GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); - graph->AddControlEdge(GetNode(*graph, "const0"), - GetNode(*graph, "ctrl_succ")); - - // Enable the pass. - flags::Global().replicate_small_constants.reset(true); - - GraphDef before; - graph->ToGraphDef(&before); - GraphOptimizationPassOptions options; - options.graph = &graph; - ReplicateConstantsPass pass; - TF_ASSERT_OK(pass.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - Node* dst0 = GetNode(*graph, "dst0"); - Node* dst1 = GetNode(*graph, "dst1"); - Node* dst2 = GetNode(*graph, "dst2"); - EXPECT_EQ(dst0->assigned_device_name(), - GetPredecessor(dst0)->assigned_device_name()); - EXPECT_NE(dst1->assigned_device_name(), - GetPredecessor(dst1)->assigned_device_name()); - EXPECT_NE(dst2->assigned_device_name(), - GetPredecessor(dst2)->assigned_device_name()); -} - -// Test that a constant on a TPU is ignored. -TEST(ReplicateConstantsPassTest, TestTpuConst) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - Output const0 = - ops::Const(scope.WithOpName("const0"), 1.0f, TensorShape({})); - ops::Negate dst0(scope.WithOpName("dst0"), const0); - ops::Negate dst1(scope.WithOpName("dst1"), const0); - ops::Negate dst2(scope.WithOpName("dst2"), const0); - TF_CHECK_OK(scope.ToGraph(graph.get())); - } - GetNode(*graph, "const0")->set_assigned_device_name(kTpu00); - GetNode(*graph, "dst0")->set_assigned_device_name(kTpu00); - GetNode(*graph, "dst1")->set_assigned_device_name(kTpu10); - GetNode(*graph, "dst2")->set_assigned_device_name(kTpu10); - - // Enable the pass. - flags::Global().replicate_small_constants.reset(true); - - GraphDef before; - graph->ToGraphDef(&before); - GraphOptimizationPassOptions options; - options.graph = &graph; - ReplicateConstantsPass pass; - TF_ASSERT_OK(pass.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - Node* dst0 = GetNode(*graph, "dst0"); - Node* dst1 = GetNode(*graph, "dst1"); - Node* dst2 = GetNode(*graph, "dst2"); - EXPECT_EQ(dst0->assigned_device_name(), - GetPredecessor(dst0)->assigned_device_name()); - EXPECT_NE(dst1->assigned_device_name(), - GetPredecessor(dst1)->assigned_device_name()); - EXPECT_NE(dst2->assigned_device_name(), - GetPredecessor(dst2)->assigned_device_name()); -} - -TEST(ReplicateConstantsPassTest, TestSmallAndLargeConstants) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - Output small = ops::Const(scope.WithOpName("small"), 1.0f, TensorShape({})); - Output large = - ops::Const(scope.WithOpName("large"), - {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}); - ops::Add dst0(scope.WithOpName("dst0"), small, large); - ops::Add dst1(scope.WithOpName("dst1"), small, large); - ops::Add dst2(scope.WithOpName("dst2"), small, large); - TF_CHECK_OK(scope.ToGraph(graph.get())); - } - GetNode(*graph, "small")->set_assigned_device_name(kCpu0); - GetNode(*graph, "large")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); - GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); - - // Enable the pass. - flags::Global().replicate_small_constants.reset(true); - - GraphDef before; - graph->ToGraphDef(&before); - GraphOptimizationPassOptions options; - options.graph = &graph; - ReplicateConstantsPass pass; - TF_ASSERT_OK(pass.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - Node* small0 = GetNode(*graph, "small/replicate/_0"); - Node* small1 = GetNode(*graph, "small/replicate/_1"); - Node* large = GetNode(*graph, "large"); - Node* dst0 = GetNode(*graph, "dst0"); - Node* dst1 = GetNode(*graph, "dst1"); - Node* dst2 = GetNode(*graph, "dst2"); - EXPECT_EQ(small0->assigned_device_name(), kCpu0); - EXPECT_EQ(small1->assigned_device_name(), kCpu1); - EXPECT_EQ(large->assigned_device_name(), kCpu0); - EXPECT_EQ(dst0->assigned_device_name(), kCpu0); - EXPECT_EQ(dst1->assigned_device_name(), kCpu1); - EXPECT_EQ(dst1->assigned_device_name(), kCpu1); - EXPECT_TRUE(IsEdge(small0, dst0)); - EXPECT_TRUE(IsEdge(large, dst0)); - EXPECT_TRUE(IsEdge(small1, dst1)); - EXPECT_TRUE(IsEdge(large, dst1)); - EXPECT_TRUE(IsEdge(small1, dst2)); - EXPECT_TRUE(IsEdge(large, dst2)); -} - -// Test that a constant at a CPU with TPU successors is replicated to the -// TPUs' host CPUs. -TEST(ReplicateConstantsPassTest, TestTpuDestinations) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - Output const0 = - ops::Const(scope.WithOpName("const"), 1.0f, TensorShape({})); - ops::Negate dst00(scope.WithOpName("dst00"), const0); - ops::Negate dst01(scope.WithOpName("dst01"), const0); - ops::Negate dst10(scope.WithOpName("dst10"), const0); - ops::Negate dst11(scope.WithOpName("dst11"), const0); - TF_CHECK_OK(scope.ToGraph(graph.get())); - } - GetNode(*graph, "const")->set_assigned_device_name(kCpu0); - GetNode(*graph, "dst00")->set_assigned_device_name(kTpu00); - GetNode(*graph, "dst01")->set_assigned_device_name(kTpu01); - GetNode(*graph, "dst10")->set_assigned_device_name(kTpu10); - GetNode(*graph, "dst11")->set_assigned_device_name(kTpu11); - - // Enable the pass. - flags::Global().replicate_small_constants.reset(true); - - GraphDef before; - graph->ToGraphDef(&before); - GraphOptimizationPassOptions options; - options.graph = &graph; - ReplicateConstantsPass pass; - TF_ASSERT_OK(pass.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - Node* const0 = GetNode(*graph, "const/replicate/_0"); - Node* const1 = GetNode(*graph, "const/replicate/_1"); - Node* dst00 = GetNode(*graph, "dst00"); - Node* dst01 = GetNode(*graph, "dst01"); - Node* dst10 = GetNode(*graph, "dst10"); - Node* dst11 = GetNode(*graph, "dst11"); - EXPECT_EQ(const0->assigned_device_name(), kCpu0); - EXPECT_EQ(const1->assigned_device_name(), kCpu1); - EXPECT_TRUE(IsEdge(const0, dst00)); - EXPECT_TRUE(IsEdge(const0, dst01)); - EXPECT_TRUE(IsEdge(const1, dst10)); - EXPECT_TRUE(IsEdge(const1, dst11)); -} - -} // namespace tensorflow diff --git a/tensorflow/core/config/flag_defs.h b/tensorflow/core/config/flag_defs.h index 89ea6a9b73bbf1..4ab5fb4750de46 100644 --- a/tensorflow/core/config/flag_defs.h +++ b/tensorflow/core/config/flag_defs.h @@ -49,10 +49,6 @@ class Flags { TF_DECLARE_FLAG(more_stack_traces, false, "Enable experimental code that preserves and propagates " "graph node stack traces in C++."); - TF_DECLARE_FLAG(replicate_small_constants, false, - "Enable a graph optimization pass that replicate each small " - "constant to its successors' devices. This can decrease " - "message passing."); // LINT.ThenChange(//tensorflow/core/config/flags_api_wrapper.cc) }; diff --git a/tensorflow/core/config/flags_api_wrapper.cc b/tensorflow/core/config/flags_api_wrapper.cc index 974581e931f7ec..58074fb06257d1 100644 --- a/tensorflow/core/config/flags_api_wrapper.cc +++ b/tensorflow/core/config/flags_api_wrapper.cc @@ -51,6 +51,5 @@ PYBIND11_MODULE(flags_pybind, m) { TF_PY_DECLARE_FLAG(saved_model_fingerprinting); TF_PY_DECLARE_FLAG(tf_shape_default_int64); TF_PY_DECLARE_FLAG(more_stack_traces); - TF_PY_DECLARE_FLAG(replicate_small_constants); // LINT.ThenChange(//tensorflow/core/config/flag_defs.h) }; diff --git a/tensorflow/python/flags_pybind.pyi b/tensorflow/python/flags_pybind.pyi index 90aa0a7d76114b..34b0a0c5666eb8 100644 --- a/tensorflow/python/flags_pybind.pyi +++ b/tensorflow/python/flags_pybind.pyi @@ -24,7 +24,6 @@ class Flags: graph_building_optimization: Flag more_stack_traces: Flag op_building_optimization: Flag - replicate_small_constants: Flag saved_model_fingerprinting: Flag test_only_experiment_1: Flag test_only_experiment_2: Flag From da49f51718a58759cd6d54d03bd7572aeef8c81c Mon Sep 17 00:00:00 2001 From: Chao Date: Fri, 22 Sep 2023 11:09:49 -0700 Subject: [PATCH 155/567] PR #5822: [ROCm] fixes kernel name in rocm gpu executor Imported from GitHub PR https://github.com/openxla/xla/pull/5822 ROCm fixes kernel name due to https://github.com/openxla/xla/commit/d04387fd4075af9d70858b2910cfa19a28a28fee @akuegel @ezhulenev @ddunl Thanks in advance! Copybara import of the project: -- 4cb2176989b2d89669caae1a26ea7f2506161f4a by Chao Chen : rocm fixes kernel name Merging this change closes #5822 PiperOrigin-RevId: 567668989 --- .../xla/xla/stream_executor/rocm/rocm_gpu_executor.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc index 9c0dd98791a6a5..618514de604493 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc @@ -200,7 +200,7 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, KernelBase* kernel) { GpuKernel* rocm_kernel = AsGpuKernel(kernel); hipModule_t module = nullptr; - const string* kernelname; + const string* kernel_name; const OnDiskKernelLoaderSpec* on_disk_spec = nullptr; bool has_cubin = spec.has_cuda_cubin_on_disk(); @@ -212,7 +212,7 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, return tsl::errors::Internal( "Loading ROCM kernel from disk is not supported"); } else if (spec.has_cuda_cubin_in_memory()) { - kernelname = &spec.cuda_cubin_in_memory().kernelname(); + kernel_name = &spec.cuda_cubin_in_memory().kernel_name(); const char* hsaco = spec.cuda_cubin_in_memory().bytes(); absl::MutexLock lock{&in_memory_modules_mu_}; @@ -226,9 +226,9 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, return tsl::errors::Internal("No method of loading ROCM kernel provided"); } - VLOG(2) << "getting function " << *kernelname << " from module " << module; + VLOG(2) << "getting function " << *kernel_name << " from module " << module; TF_RETURN_IF_ERROR(GpuDriver::GetModuleFunction( - context_, module, kernelname->c_str(), rocm_kernel->gpu_function_ptr())); + context_, module, kernel_name->c_str(), rocm_kernel->gpu_function_ptr())); // We have to trust the kernel loader spec arity because there doesn't appear // to be a way to reflect on the number of expected arguments w/the ROCM API. @@ -237,7 +237,7 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, KernelMetadata kernel_metadata; TF_RETURN_IF_ERROR(GetKernelMetadata(rocm_kernel, &kernel_metadata)); kernel->set_metadata(kernel_metadata); - kernel->set_name(*kernelname); + kernel->set_name(*kernel_name); return tsl::OkStatus(); } From 34a5499d01293ee05c3c36d67aa2cc2891820489 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 11:11:28 -0700 Subject: [PATCH 156/567] Add type hints for ops.Operation. PiperOrigin-RevId: 567669383 --- tensorflow/python/framework/function.py | 6 +++--- .../kernel_tests/random/stateless_random_ops_test.py | 11 ++++++----- tensorflow/python/ops/tensor_array_grad.py | 12 ++++++------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 502d0805012afa..068a685184245a 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -914,7 +914,7 @@ def _add_tensor_and_parents(self, tensor): op = self._add_op_and_parents(tensor.op) return op.outputs[tensor.value_index] - def _add_op_and_parents(self, op): + def _add_op_and_parents(self, op: ops.Operation): # pylint: disable=protected-access op_def = graph_to_function_def._get_op_def(op) if op._is_stateful and op not in self._allowlisted_stateful_ops: @@ -1049,13 +1049,13 @@ def _is_guaranteed_const(tensor): class Work(object): - def __init__(self, op, leaving): + def __init__(self, op: ops.Operation, leaving): self.op = op self.leaving = leaving is_guaranteed_const = lambda op: op.node_def.op == "GuaranteeConst" constants = set([]) - def all_inputs_const(op): + def all_inputs_const(op: ops.Operation): # If all inputs of an op are guaranteed constants, then we can infer that # the op produces a constant as well. return op.inputs and all(inp.op in constants for inp in op.inputs) diff --git a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py index 08c8b53cbf7303..c5c3aeb50a05e4 100644 --- a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py @@ -102,7 +102,7 @@ def float_cases(shape_dtypes=(None,)): ) # Explicitly passing in params because capturing cell variable from loop is # problematic in Python - def wrap(op, dtype, shape, shape_dtype, seed, **kwargs): + def wrap(op: ops.Operation, dtype, shape, shape_dtype, seed, **kwargs): device_type = get_device().device_type # Some dtypes are not supported on some devices if (dtype == dtypes.bfloat16 and device_type == 'GPU' and @@ -134,7 +134,8 @@ def _name(a): def int_cases(shape_dtypes=(None,), minval_maxval=None): - def wrap(op, minval, maxval, shape, shape_dtype, dtype, seed, **kwargs): + def wrap(op: ops.Operation, minval, maxval, shape, shape_dtype, dtype, + seed, **kwargs): shape_ = (constant_op.constant(shape, dtype=shape_dtype) if shape_dtype is not None else shape) return op( @@ -156,7 +157,7 @@ def wrap(op, minval, maxval, shape, shape_dtype, dtype, seed, **kwargs): def multinomial_cases(): num_samples = 10 - def wrap(op, logits, logits_dtype, output_dtype, seed): + def wrap(op: ops.Operation, logits, logits_dtype, output_dtype, seed): device_type = get_device().device_type # Some dtypes are not supported on some devices if (logits_dtype == dtypes.bfloat16 and device_type == 'GPU' and @@ -183,7 +184,7 @@ def wrap(op, logits, logits_dtype, output_dtype, seed): def gamma_cases(): - def wrap(op, alpha, dtype, shape, seed): + def wrap(op: ops.Operation, alpha, dtype, shape, seed): return op(seed=seed, shape=shape, alpha=constant_op.constant(alpha, dtype=dtype), dtype=dtype) for dtype in np.float16, np.float32, np.float64: @@ -196,7 +197,7 @@ def wrap(op, alpha, dtype, shape, seed): def poisson_cases(): - def wrap(op, lam, lam_dtype, out_dtype, shape, seed): + def wrap(op: ops.Operation, lam, lam_dtype, out_dtype, shape, seed): return op(seed=seed, shape=shape, lam=constant_op.constant(lam_dtype(lam), dtype=lam_dtype), dtype=out_dtype) diff --git a/tensorflow/python/ops/tensor_array_grad.py b/tensorflow/python/ops/tensor_array_grad.py index 994ec609bcf5f1..25ca3495416b7b 100644 --- a/tensorflow/python/ops/tensor_array_grad.py +++ b/tensorflow/python/ops/tensor_array_grad.py @@ -80,7 +80,7 @@ def _GetGradSource(op_or_tensor): @ops.RegisterGradient("TensorArrayRead") @ops.RegisterGradient("TensorArrayReadV2") @ops.RegisterGradient("TensorArrayReadV3") -def _TensorArrayReadGrad(op, grad): +def _TensorArrayReadGrad(op: ops.Operation, grad): """Gradient for TensorArrayRead. Args: @@ -111,7 +111,7 @@ def _TensorArrayReadGrad(op, grad): @ops.RegisterGradient("TensorArrayWrite") @ops.RegisterGradient("TensorArrayWriteV2") @ops.RegisterGradient("TensorArrayWriteV3") -def _TensorArrayWriteGrad(op, flow): +def _TensorArrayWriteGrad(op: ops.Operation, flow): """Gradient for TensorArrayWrite. Args: @@ -143,7 +143,7 @@ def _TensorArrayWriteGrad(op, flow): @ops.RegisterGradient("TensorArrayGather") @ops.RegisterGradient("TensorArrayGatherV2") @ops.RegisterGradient("TensorArrayGatherV3") -def _TensorArrayGatherGrad(op, grad): +def _TensorArrayGatherGrad(op: ops.Operation, grad): """Gradient for TensorArrayGather. Args: @@ -174,7 +174,7 @@ def _TensorArrayGatherGrad(op, grad): @ops.RegisterGradient("TensorArrayScatter") @ops.RegisterGradient("TensorArrayScatterV2") @ops.RegisterGradient("TensorArrayScatterV3") -def _TensorArrayScatterGrad(op, flow): +def _TensorArrayScatterGrad(op: ops.Operation, flow): """Gradient for TensorArrayScatter. Args: @@ -204,7 +204,7 @@ def _TensorArrayScatterGrad(op, flow): @ops.RegisterGradient("TensorArrayConcat") @ops.RegisterGradient("TensorArrayConcatV2") @ops.RegisterGradient("TensorArrayConcatV3") -def _TensorArrayConcatGrad(op, grad, unused_lengths_grad): +def _TensorArrayConcatGrad(op: ops.Operation, grad, unused_lengths_grad): """Gradient for TensorArrayConcat. Args: @@ -236,7 +236,7 @@ def _TensorArrayConcatGrad(op, grad, unused_lengths_grad): @ops.RegisterGradient("TensorArraySplit") @ops.RegisterGradient("TensorArraySplitV2") @ops.RegisterGradient("TensorArraySplitV3") -def _TensorArraySplitGrad(op, flow): +def _TensorArraySplitGrad(op: ops.Operation, flow): """Gradient for TensorArraySplit. Args: From efbc945baa1744389fba62202bf6dc6782914453 Mon Sep 17 00:00:00 2001 From: Jieying Luo Date: Fri, 22 Sep 2023 11:14:10 -0700 Subject: [PATCH 157/567] [PJRT C API] Register "InspectSharding" custom partitioning handler in GPU plugin. PiperOrigin-RevId: 567670200 --- third_party/xla/xla/pjrt/c/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/pjrt/c/BUILD b/third_party/xla/xla/pjrt/c/BUILD index d0a7a55ea82099..ba1adb05c17794 100644 --- a/third_party/xla/xla/pjrt/c/BUILD +++ b/third_party/xla/xla/pjrt/c/BUILD @@ -141,6 +141,7 @@ cc_library( "//xla/pjrt:pjrt_common", "//xla/pjrt/gpu:gpu_helpers", "//xla/pjrt/gpu:se_gpu_pjrt_client", + "//xla/python:inspect_sharding", # To register "InspectSharding" custom partitioning handler. "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", From 3de450f2ebb7eca8b92ae24548ed11b53821c129 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Fri, 22 Sep 2023 11:25:03 -0700 Subject: [PATCH 158/567] [XLA:GPU] Fix accidentally broken Triton emitter test. Addressing the test case just added in cl/567619889. PiperOrigin-RevId: 567673155 --- .../xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc index 832c7a295b981a..09af76884ead91 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc @@ -719,6 +719,7 @@ ENTRY e { float tolerance; switch (data_type) { case F32: + case BF16: tolerance = 1e-6; break; case F16: From f60a692345e570d51b4dcb3578b475d229586933 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 11:34:19 -0700 Subject: [PATCH 159/567] Add type hints under third_party/tensorflow. PiperOrigin-RevId: 567675462 --- tensorflow/python/ops/random_grad.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/random_grad.py b/tensorflow/python/ops/random_grad.py index 32d2b35abe9aa8..a7984ff7cd5fca 100644 --- a/tensorflow/python/ops/random_grad.py +++ b/tensorflow/python/ops/random_grad.py @@ -34,7 +34,7 @@ def add_leading_unit_dimensions(x, num_dimensions): # pylint: disable=invalid-n @ops.RegisterGradient("RandomGamma") -def _RandomGammaGrad(op, grad): # pylint: disable=invalid-name +def _RandomGammaGrad(op: ops.Operation, grad): # pylint: disable=invalid-name """Returns the gradient of a Gamma sample w.r.t. alpha. The gradient is computed using implicit differentiation @@ -74,7 +74,7 @@ def _RandomGammaGrad(op, grad): # pylint: disable=invalid-name @ops.RegisterGradient("StatelessRandomGammaV2") -def _StatelessRandomGammaV2Grad(op, grad): # pylint: disable=invalid-name +def _StatelessRandomGammaV2Grad(op: ops.Operation, grad): # pylint: disable=invalid-name """Returns the gradient of a Gamma sample w.r.t. alpha. The gradient is computed using implicit differentiation @@ -106,7 +106,7 @@ def _StatelessRandomGammaV2Grad(op, grad): # pylint: disable=invalid-name @ops.RegisterGradient("StatelessRandomGammaV3") -def _StatelessRandomGammaV3Grad(op, grad): # pylint: disable=invalid-name +def _StatelessRandomGammaV3Grad(op: ops.Operation, grad): # pylint: disable=invalid-name """Returns the gradient of a Gamma sample w.r.t. alpha. The gradient is computed using implicit differentiation @@ -169,7 +169,7 @@ def _Ndtr(x): @ops.RegisterGradient("StatelessParameterizedTruncatedNormal") -def _StatelessParameterizedTruncatedNormalGrad(op, grad): # pylint: disable=invalid-name +def _StatelessParameterizedTruncatedNormalGrad(op: ops.Operation, grad): # pylint: disable=invalid-name """Returns the gradient of a TruncatedNormal sample w.r.t. parameters. The gradient is computed using implicit differentiation From f2d3345f00add8504ef3297495c288e896b0212c Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 22 Sep 2023 11:38:04 -0700 Subject: [PATCH 160/567] Replicate small constants so they don't need to be sent to their successors. A small constant is replicated to each of its successors' devices. The maximum size of a constant to be replicated is 16 elements. This pass is disabled by default and can be enabled with the flag replicate_small_constants. The previous version broke the CI for macOS. This version rolls forwards with a split-out C++ test target. PiperOrigin-RevId: 567676442 --- tensorflow/core/common_runtime/BUILD | 91 ++++- .../replicate_constants_pass.cc | 189 ++++++++++ .../common_runtime/replicate_constants_pass.h | 50 +++ .../replicate_constants_pass_test.cc | 334 ++++++++++++++++++ tensorflow/core/config/flag_defs.h | 4 + tensorflow/core/config/flags_api_wrapper.cc | 1 + tensorflow/python/flags_pybind.pyi | 1 + 7 files changed, 661 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/common_runtime/replicate_constants_pass.cc create mode 100644 tensorflow/core/common_runtime/replicate_constants_pass.h create mode 100644 tensorflow/core/common_runtime/replicate_constants_pass_test.cc diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 4b7c5eb8b7c29d..21537de1b5765f 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -1,7 +1,9 @@ +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( "//tensorflow:tensorflow.bzl", "if_google", "if_libtpu", + "if_macos", "if_oss", "if_zendnn", "tf_cc_test", @@ -21,25 +23,24 @@ load( "tf_protos_all", "tf_protos_grappler", ) -load( - "//tensorflow/core/platform:rules_cc.bzl", - "cc_library", -) load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", "tf_cuda_tests_tags", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( - "//third_party/mkl:build_defs.bzl", - "if_mkl", - "if_mkl_ml", + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library", ) load( "//tensorflow/security/fuzzing:tf_fuzzing.bzl", "tf_cc_fuzz_test", ) +load( + "//third_party/mkl:build_defs.bzl", + "if_mkl", + "if_mkl_ml", +) default_package_visibility = [ "//tensorflow:internal", @@ -299,6 +300,7 @@ filegroup( "renamed_device.h", "rendezvous_mgr.h", "rendezvous_util.h", + "replicate_constants_pass.h", "replicate_per_replica_nodes.h", "ring_alg.h", "ring_gatherer.h", @@ -1132,6 +1134,31 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "replicate_constants_pass", + srcs = ["replicate_constants_pass.cc"], + hdrs = ["replicate_constants_pass.h"], + copts = tf_copts(), + deps = [ + ":optimization_registry", + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:portable_gif_internal", + "//tensorflow/core/config:flag_defs", + "//tensorflow/core/config:flags", + "//tensorflow/core/framework:node_def_util", + "//tensorflow/core/framework:tensor_proto_cc", + "//tensorflow/core/framework:tensor_shape_proto_cc", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/platform:statusor", + ], + alwayslink = 1, +) + cc_library( name = "local_device", srcs = ["local_device.cc"], @@ -1948,7 +1975,10 @@ tf_cuda_library( ":step_stats_collector", ":threadpool_device", ":threadpool_device_factory", - ] + if_zendnn([":zen_layout_pass"]), + ] + if_zendnn([":zen_layout_pass"]) + if_macos( + [], + [":replicate_constants_pass"], # TODO(b/301469885): Remove. + ), ) tf_cuda_library( @@ -2342,6 +2372,7 @@ tf_cc_tests( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/config:flag_defs", "//tensorflow/core/kernels:ops_util", "//tensorflow/core/nccl:collective_communicator", "//tensorflow/core/platform:regexp", @@ -2353,6 +2384,48 @@ tf_cc_tests( ], ) +tf_cc_tests( + name = "replicate_constants_pass_test", + size = "small", + srcs = [ + "replicate_constants_pass_test.cc", + ], + deps = [ + ":core", + ":core_cpu", + ":core_cpu_internal", + ":direct_session_internal", + ":pending_counts", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:ops", + "//tensorflow/cc:scope", + "//tensorflow/cc:sendrecv_ops", + "//tensorflow/cc:while_loop", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:ops", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/config:flag_defs", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/nccl:collective_communicator", + "//tensorflow/core/platform:regexp", + "//tensorflow/core/util:protos_test_cc", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@eigen_archive//:eigen3", + ] + if_macos([ + ":replicate_constants_pass", # TODO(b/301469885): Remove. + ]), +) + tf_cc_tests( name = "higher_level_tests_needing_kernels", size = "small", diff --git a/tensorflow/core/common_runtime/replicate_constants_pass.cc b/tensorflow/core/common_runtime/replicate_constants_pass.cc new file mode 100644 index 00000000000000..73f96d66f940bb --- /dev/null +++ b/tensorflow/core/common_runtime/replicate_constants_pass.cc @@ -0,0 +1,189 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/replicate_constants_pass.h" + +#include +#include +#include +#include +#include + +#include "absl/container/btree_map.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/config/flag_defs.h" +#include "tensorflow/core/config/flags.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/dump_graph.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/status.h" +#include "tsl/platform/statusor.h" + +namespace tensorflow { +namespace { + +// Maximum size constant to replicate. +constexpr int64_t kMaxSize = 16; + +// Set `node`'s name to /replicate/_ +void SetUniqueName(Graph* graph, Node* node) { + node->set_name(graph->NewName(absl::StrCat(node->name(), "/replicate"))); +} + +// `node` has an output control edge. +bool HasControlOut(Node* node) { + auto control_out_it = + std::find_if(node->out_edges().begin(), node->out_edges().end(), + [](const auto& e) { return e->IsControlEdge(); }); + return control_out_it != node->out_edges().end(); +} + +// `node`'s device is a CPU. +bool HasCpuDevice(const Node* node) { + DeviceNameUtils::ParsedName device; + if (!DeviceNameUtils::ParseFullName(node->assigned_device_name(), &device)) + return false; + return device.type == "CPU"; +} + +// Get the CPU device on the same host as dst. +Status GetDestinationCpuDevice(const Node* dst, std::string* device) { + if (!dst->has_assigned_device_name()) + return absl::AbortedError( + absl::StrCat("Node name: ", dst->name(), " has no assigned device.")); + return DeviceNameUtils::DeviceNameToCpuDeviceName(dst->assigned_device_name(), + device); +} + +// Collect the successor edges of the constant. Group them by the device of the +// successor. +Status GetSuccessorEdges( + Node* node, + absl::btree_map>& device_to_edges) { + for (const auto& edge : node->out_edges()) { + const Node* dst = edge->dst(); + std::string device; + TF_RETURN_IF_ERROR(GetDestinationCpuDevice(dst, &device)); + if (!device_to_edges.count(device)) device_to_edges.insert({device, {}}); + device_to_edges[device].push_back(edge); + } + return OkStatus(); +} + +// Replicate the constant to each successor device. +void ReplicateToEachDevice( + Graph* graph, Node* node, + absl::btree_map>& device_to_edges) { + for (const auto& pair : device_to_edges) { + Node* copy = graph->CopyNode(node); + SetUniqueName(graph, copy); + const std::string device = pair.first; + copy->set_assigned_device_name(device); + // Set the successor edges to ops on this device. + for (const Edge* edge : pair.second) { + graph->AddEdge(copy, edge->src_output(), edge->dst(), edge->dst_input()); + } + // Replicate in edges that are control. + for (Node* src : node->in_nodes()) { + graph->AddControlEdge(src, copy, true); + } + } + graph->RemoveNode(node); +} + +} // namespace + +Status ReplicateConstantsPass::Run( + const GraphOptimizationPassOptions& options) { + if (!flags::Global().replicate_small_constants.value()) { + VLOG(1) << "replicate_constants_pass not enabled"; + return OkStatus(); + } + VLOG(1) << "replicate_constants_pass will replicate constants with " + "number-of-elements <= " + << kMaxSize; + + Graph* graph = options.graph->get(); + if (VLOG_IS_ON(1)) { + VLOG(1) << DumpGraphToFile("before_replicate_constants_pass", *graph, + options.flib_def); + } + int64_t min_skipped = std::numeric_limits::max(); + int64_t max_skipped = std::numeric_limits::min(); + for (Node* node : graph->nodes()) { + if (!node->IsConstant()) continue; + + // For performance, skip when there is at most one successor. + if (node->out_edges().size() <= 1) continue; + + // Skip if the constant has a control successor. Replicating constants with + // control successors would require relpicating these control edges, which + // could result in even more message passing. + if (HasControlOut(node)) continue; + + // Skip if the constant is too large. + const TensorProto* value = nullptr; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "value", &value)); + TF_ASSIGN_OR_RETURN(TensorShape shape, + TensorShape::BuildTensorShape(value->tensor_shape())); + if (shape.num_elements() > kMaxSize) { + min_skipped = std::min(min_skipped, shape.num_elements()); + max_skipped = std::max(max_skipped, shape.num_elements()); + continue; + } + + // Skip if there is no assigned device. + if (!node->has_assigned_device_name()) continue; + + // Skip when the original constant is not on a CPU, because is not clear + // whether replicating from non-CPU to CPU is valid. + if (!HasCpuDevice(node)) continue; + + // Collect successor edges, per device. + absl::btree_map> device_to_edges; + TF_RETURN_IF_ERROR(GetSuccessorEdges(node, device_to_edges)); + + // Skip if all successors are on the same device. + if (device_to_edges.size() <= 1) continue; + + // Replicate the constant to each successor device. + ReplicateToEachDevice(graph, node, device_to_edges); + } + if (min_skipped != std::numeric_limits::max()) { + VLOG(1) << "replicate_constants_pass skipped replicating constants with " + "number of elements in the range " + << min_skipped << " to " << max_skipped << "."; + } + + if (VLOG_IS_ON(1)) { + VLOG(1) << DumpGraphToFile("after_replicate_constants_pass", *graph, + options.flib_def); + } + return OkStatus(); +} + +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_PLACEMENT, 3, + ReplicateConstantsPass); + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/replicate_constants_pass.h b/tensorflow/core/common_runtime/replicate_constants_pass.h new file mode 100644 index 00000000000000..b7b2f0fe98c0d2 --- /dev/null +++ b/tensorflow/core/common_runtime/replicate_constants_pass.h @@ -0,0 +1,50 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" + +// Small constants are replicated to the hosts of their successors. This pass +// only applies when there are multiple successors. +// +// For example, the graph: +// C -> {Op0, Op1, Op2, Op3} +// C's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:CPU:0 +// Op0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:0 +// Op1's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:1 +// Op2's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:0 +// Op3's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:1 +// is rewritten to: +// C0 -> {Op0, Op1} +// C1 -> {Op2, Op3} +// C0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:CPU:0 +// C1's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:CPU:0 +// Op0's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:0 +// Op1's assigned_device is /job:tpu_host_worker/replica:0/task:0/device:TPU:1 +// Op2's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:0 +// Op3's assigned_device is /job:tpu_host_worker/replica:0/task:1/device:TPU:1 + +namespace tensorflow { + +class ReplicateConstantsPass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_REPLICATE_CONSTANTS_PASS_H_ diff --git a/tensorflow/core/common_runtime/replicate_constants_pass_test.cc b/tensorflow/core/common_runtime/replicate_constants_pass_test.cc new file mode 100644 index 00000000000000..dae22012bfba8b --- /dev/null +++ b/tensorflow/core/common_runtime/replicate_constants_pass_test.cc @@ -0,0 +1,334 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/replicate_constants_pass.h" + +#include +#include + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/config/flag_defs.h" +#include "tensorflow/core/config/flags.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/test.h" +#include "tsl/lib/core/status_test_util.h" +#include "tsl/platform/status.h" +#include "tsl/platform/test.h" + +namespace tensorflow { + +const char kCpu0[] = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"; +const char kCpu1[] = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"; +const char kTpu00[] = "/job:tpu_host_worker/replica:0/task:0/device:TPU:0"; +const char kTpu01[] = "/job:tpu_host_worker/replica:0/task:0/device:TPU:1"; +const char kTpu10[] = "/job:tpu_host_worker/replica:0/task:1/device:TPU:0"; +const char kTpu11[] = "/job:tpu_host_worker/replica:0/task:1/device:TPU:1"; + +// Return the node with name `name`. +Node* GetNode(const Graph& graph, const std::string& name) { + for (Node* node : graph.nodes()) { + if (node->name() == name) return node; + } + CHECK(false) << "Unknown node name: " << name; + return nullptr; +} + +// Return the first predecessor of `node`. +Node* GetPredecessor(Node* node) { + auto it = node->in_nodes().begin(); + CHECK(it != node->in_nodes().end()) + << "No predecessor for " << node->name() << "\n"; + return *it; +} + +// There exists an edge from `src` to `dst`. +bool IsEdge(Node* src, Node* dst) { + for (Node* node : src->out_nodes()) { + if (node == dst) return true; + } + return false; +} + +// Test that a small constant is replicated to each successor's device. +TEST(ReplicateConstantsPassTest, TestSmallConstant) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const"), 1.0f, TensorShape({})); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_EQ(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_EQ(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +// Test that a large constant is ignored. +TEST(ReplicateConstantsPassTest, TestLargeConstant) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const"), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_NE(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_NE(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +// Test that a constant with a control successor is ignored. +TEST(ReplicateConstantsPassTest, TestControlOut) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const0"), 1.0f, TensorShape({})); + Output ctrl_succ = + ops::Const(scope.WithOpName("ctrl_succ"), 1.0f, TensorShape({})); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "ctrl_succ")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + graph->AddControlEdge(GetNode(*graph, "const0"), + GetNode(*graph, "ctrl_succ")); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_NE(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_NE(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +// Test that a constant on a TPU is ignored. +TEST(ReplicateConstantsPassTest, TestTpuConst) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const0"), 1.0f, TensorShape({})); + ops::Negate dst0(scope.WithOpName("dst0"), const0); + ops::Negate dst1(scope.WithOpName("dst1"), const0); + ops::Negate dst2(scope.WithOpName("dst2"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const0")->set_assigned_device_name(kTpu00); + GetNode(*graph, "dst0")->set_assigned_device_name(kTpu00); + GetNode(*graph, "dst1")->set_assigned_device_name(kTpu10); + GetNode(*graph, "dst2")->set_assigned_device_name(kTpu10); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(dst0->assigned_device_name(), + GetPredecessor(dst0)->assigned_device_name()); + EXPECT_NE(dst1->assigned_device_name(), + GetPredecessor(dst1)->assigned_device_name()); + EXPECT_NE(dst2->assigned_device_name(), + GetPredecessor(dst2)->assigned_device_name()); +} + +TEST(ReplicateConstantsPassTest, TestSmallAndLargeConstants) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output small = ops::Const(scope.WithOpName("small"), 1.0f, TensorShape({})); + Output large = + ops::Const(scope.WithOpName("large"), + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}); + ops::Add dst0(scope.WithOpName("dst0"), small, large); + ops::Add dst1(scope.WithOpName("dst1"), small, large); + ops::Add dst2(scope.WithOpName("dst2"), small, large); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "small")->set_assigned_device_name(kCpu0); + GetNode(*graph, "large")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst0")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst1")->set_assigned_device_name(kCpu1); + GetNode(*graph, "dst2")->set_assigned_device_name(kCpu1); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* small0 = GetNode(*graph, "small/replicate/_0"); + Node* small1 = GetNode(*graph, "small/replicate/_1"); + Node* large = GetNode(*graph, "large"); + Node* dst0 = GetNode(*graph, "dst0"); + Node* dst1 = GetNode(*graph, "dst1"); + Node* dst2 = GetNode(*graph, "dst2"); + EXPECT_EQ(small0->assigned_device_name(), kCpu0); + EXPECT_EQ(small1->assigned_device_name(), kCpu1); + EXPECT_EQ(large->assigned_device_name(), kCpu0); + EXPECT_EQ(dst0->assigned_device_name(), kCpu0); + EXPECT_EQ(dst1->assigned_device_name(), kCpu1); + EXPECT_EQ(dst1->assigned_device_name(), kCpu1); + EXPECT_TRUE(IsEdge(small0, dst0)); + EXPECT_TRUE(IsEdge(large, dst0)); + EXPECT_TRUE(IsEdge(small1, dst1)); + EXPECT_TRUE(IsEdge(large, dst1)); + EXPECT_TRUE(IsEdge(small1, dst2)); + EXPECT_TRUE(IsEdge(large, dst2)); +} + +// Test that a constant at a CPU with TPU successors is replicated to the +// TPUs' host CPUs. +TEST(ReplicateConstantsPassTest, TestTpuDestinations) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + Output const0 = + ops::Const(scope.WithOpName("const"), 1.0f, TensorShape({})); + ops::Negate dst00(scope.WithOpName("dst00"), const0); + ops::Negate dst01(scope.WithOpName("dst01"), const0); + ops::Negate dst10(scope.WithOpName("dst10"), const0); + ops::Negate dst11(scope.WithOpName("dst11"), const0); + TF_CHECK_OK(scope.ToGraph(graph.get())); + } + GetNode(*graph, "const")->set_assigned_device_name(kCpu0); + GetNode(*graph, "dst00")->set_assigned_device_name(kTpu00); + GetNode(*graph, "dst01")->set_assigned_device_name(kTpu01); + GetNode(*graph, "dst10")->set_assigned_device_name(kTpu10); + GetNode(*graph, "dst11")->set_assigned_device_name(kTpu11); + + // Enable the pass. + flags::Global().replicate_small_constants.reset(true); + + GraphDef before; + graph->ToGraphDef(&before); + GraphOptimizationPassOptions options; + options.graph = &graph; + ReplicateConstantsPass pass; + TF_ASSERT_OK(pass.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + Node* const0 = GetNode(*graph, "const/replicate/_0"); + Node* const1 = GetNode(*graph, "const/replicate/_1"); + Node* dst00 = GetNode(*graph, "dst00"); + Node* dst01 = GetNode(*graph, "dst01"); + Node* dst10 = GetNode(*graph, "dst10"); + Node* dst11 = GetNode(*graph, "dst11"); + EXPECT_EQ(const0->assigned_device_name(), kCpu0); + EXPECT_EQ(const1->assigned_device_name(), kCpu1); + EXPECT_TRUE(IsEdge(const0, dst00)); + EXPECT_TRUE(IsEdge(const0, dst01)); + EXPECT_TRUE(IsEdge(const1, dst10)); + EXPECT_TRUE(IsEdge(const1, dst11)); +} + +} // namespace tensorflow diff --git a/tensorflow/core/config/flag_defs.h b/tensorflow/core/config/flag_defs.h index 4ab5fb4750de46..89ea6a9b73bbf1 100644 --- a/tensorflow/core/config/flag_defs.h +++ b/tensorflow/core/config/flag_defs.h @@ -49,6 +49,10 @@ class Flags { TF_DECLARE_FLAG(more_stack_traces, false, "Enable experimental code that preserves and propagates " "graph node stack traces in C++."); + TF_DECLARE_FLAG(replicate_small_constants, false, + "Enable a graph optimization pass that replicate each small " + "constant to its successors' devices. This can decrease " + "message passing."); // LINT.ThenChange(//tensorflow/core/config/flags_api_wrapper.cc) }; diff --git a/tensorflow/core/config/flags_api_wrapper.cc b/tensorflow/core/config/flags_api_wrapper.cc index 58074fb06257d1..974581e931f7ec 100644 --- a/tensorflow/core/config/flags_api_wrapper.cc +++ b/tensorflow/core/config/flags_api_wrapper.cc @@ -51,5 +51,6 @@ PYBIND11_MODULE(flags_pybind, m) { TF_PY_DECLARE_FLAG(saved_model_fingerprinting); TF_PY_DECLARE_FLAG(tf_shape_default_int64); TF_PY_DECLARE_FLAG(more_stack_traces); + TF_PY_DECLARE_FLAG(replicate_small_constants); // LINT.ThenChange(//tensorflow/core/config/flag_defs.h) }; diff --git a/tensorflow/python/flags_pybind.pyi b/tensorflow/python/flags_pybind.pyi index 34b0a0c5666eb8..90aa0a7d76114b 100644 --- a/tensorflow/python/flags_pybind.pyi +++ b/tensorflow/python/flags_pybind.pyi @@ -24,6 +24,7 @@ class Flags: graph_building_optimization: Flag more_stack_traces: Flag op_building_optimization: Flag + replicate_small_constants: Flag saved_model_fingerprinting: Flag test_only_experiment_1: Flag test_only_experiment_2: Flag From cdfaf71501eda4df974e01f13b8af9dc335f03f6 Mon Sep 17 00:00:00 2001 From: Anlun Xu Date: Fri, 22 Sep 2023 11:44:34 -0700 Subject: [PATCH 161/567] [xla:gpu] Set cublas workspace after calling cublasSetStream We need to set workspace because new memory is allocated every time cublas calls is captured in cuda graph: https://docs.nvidia.com/cuda/cublas/index.html#cuda-graphs-support We need to call it after set stream because cublasSetStream resets workspace: https://docs.nvidia.com/cuda/cublas/index.html#cuda-graphs-support PiperOrigin-RevId: 567678072 --- .../xla/xla/stream_executor/cuda/cuda_blas.cc | 39 +++++++++++++++++++ .../xla/xla/stream_executor/cuda/cuda_blas.h | 5 +++ .../xla/stream_executor/gpu/gpu_executor.h | 2 + 3 files changed, 46 insertions(+) diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc b/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc index d9d5451eb912a7..b56fb1236b45f5 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" +#include "absl/synchronization/mutex.h" #include "Eigen/Core" // from @eigen_archive #include "third_party/gpus/cuda/include/cublas_v2.h" #include "third_party/gpus/cuda/include/cuda.h" @@ -198,6 +199,19 @@ bool CUDABlas::Init() { } #endif // CUDA_VERSION >= 11000 + // Initialize cuBLAS workspace memory on device. The workspace size is + // determined by the GPU architecture: + // https://docs.nvidia.com/cuda/cublas/index.html#cublassetworkspace + absl::MutexLock lock(&mu_); + uint64_t workspace_size = + parent_->cc_major() >= 9 ? 1 << 25 /*32 MiB*/ : 1 << 22 /*4 MiB*/; + workspace_ = parent_->Allocate(workspace_size, /*memory_space=*/0); + + if (workspace_.is_null()) { + LOG(ERROR) << "Failed to allocate workspace memory"; + return false; + } + return true; } @@ -214,6 +228,9 @@ CUDABlas::CUDABlas(gpu::GpuExecutor *parent) CUDABlas::~CUDABlas() { if (blas_ != nullptr) { gpu::ScopedActivateExecutorContext sac{parent_}; + if (!workspace_.is_null()) { + parent_->Deallocate(&workspace_); + } cublasDestroy(blas_); } } @@ -232,6 +249,24 @@ bool CUDABlas::SetStream(Stream *stream) { return true; } +bool CUDABlas::SetWorkspace() { + CHECK(blas_ != nullptr); + gpu::ScopedActivateExecutorContext sac{parent_}; + + if (workspace_.is_null()) { + LOG(ERROR) << "cuBLAS workspace is not allocated"; + return false; + } + + cublasStatus_t ret = + cublasSetWorkspace(blas_, workspace_.opaque(), workspace_.size()); + if (ret != CUBLAS_STATUS_SUCCESS) { + LOG(ERROR) << "failed to set workspace for cuBLAS calls: " << ToString(ret); + return false; + } + return true; +} + cudaStream_t CUDABlas::CUDAStream(Stream *stream) { CHECK(stream != nullptr); CHECK(AsGpuStreamValue(stream) != nullptr); @@ -359,6 +394,10 @@ tsl::Status CUDABlas::DoBlasInternalImpl(FuncT cublas_func, Stream *stream, return tsl::errors::Internal("Failed setting stream"); } + if (!SetWorkspace()) { + return tsl::errors::Internal("Failed setting workspace"); + } + ScopedCublasMathMode math_mode{blas_}; #if CUBLAS_VER_MAJOR >= 11 if (math_type == CUBLAS_TF32_TENSOR_OP_MATH && diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_blas.h b/third_party/xla/xla/stream_executor/cuda/cuda_blas.h index 8869b284ce487a..1e98ab1787d240 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_blas.h +++ b/third_party/xla/xla/stream_executor/cuda/cuda_blas.h @@ -25,6 +25,7 @@ limitations under the License. #include "third_party/gpus/cuda/include/cublas_v2.h" #include "xla/stream_executor/blas.h" #include "xla/stream_executor/cuda/cuda_blas_lt.h" +#include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" @@ -70,6 +71,8 @@ class CUDABlas : public blas::BlasSupport { // invoked before calling into cuBLAS. bool SetStream(Stream *stream) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + bool SetWorkspace() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + // Returns the underlying CUDA stream. cudaStream_t CUDAStream(Stream *stream); @@ -121,6 +124,8 @@ class CUDABlas : public blas::BlasSupport { BlasLt blas_lt_; + DeviceMemoryBase workspace_ ABSL_GUARDED_BY(mu_); + SE_DISALLOW_COPY_AND_ASSIGN(CUDABlas); }; diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_executor.h b/third_party/xla/xla/stream_executor/gpu/gpu_executor.h index f5ed9093451926..06b2c014e88629 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_executor.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_executor.h @@ -284,6 +284,8 @@ class GpuExecutor : public internal::StreamExecutorInterface { return it->second; } + int cc_major() const { return cc_major_; } + private: // Host callback landing routine invoked by CUDA. // data: User-provided callback provided to HostCallback() above, captured From 2ae6018db0ebfb4d1f8af0e6534a13296b3ecb5e Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Fri, 22 Sep 2023 11:54:16 -0700 Subject: [PATCH 162/567] Internal change only PiperOrigin-RevId: 567680498 --- tensorflow/dtensor/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/dtensor/BUILD b/tensorflow/dtensor/BUILD index 97e263fb766bfb..b8117e927a2523 100644 --- a/tensorflow/dtensor/BUILD +++ b/tensorflow/dtensor/BUILD @@ -12,7 +12,6 @@ package_group( "//tensorflow_models/google/...", "//third_party/aimee/clara2_labs/...", "//third_party/py/jax_tpu_embedding/...", - "//third_party/py/keras/dtensor/...", "//third_party/py/tf_keras/dtensor/...", ], ) From 75ebcea4fed412394bd99acd85d37c29fc9a1fc5 Mon Sep 17 00:00:00 2001 From: Adam Cogdell Date: Fri, 22 Sep 2023 12:26:42 -0700 Subject: [PATCH 163/567] Add found_fingerprint_on_load metric. PiperOrigin-RevId: 567688573 --- tensorflow/cc/saved_model/metrics.cc | 16 +++++++++++ tensorflow/cc/saved_model/metrics.h | 26 +++++++++++------ tensorflow/cc/saved_model/metrics_test.cc | 11 ++++++++ tensorflow/python/saved_model/load.py | 7 ++++- .../pywrap_saved_model/metrics.pyi | 6 ++++ .../saved_model/pywrap_saved_model_metrics.cc | 28 +++++++++++++++++++ .../pywrap_saved_model_metrics_test.py | 17 +++++++++++ .../tools/def_file_filter/symbols_pybind.txt | 1 + .../tools/def_file_filter/symbols_pybind.txt | 1 + 9 files changed, 103 insertions(+), 10 deletions(-) diff --git a/tensorflow/cc/saved_model/metrics.cc b/tensorflow/cc/saved_model/metrics.cc index ca48bd70e61e3c..c21c0c8f68dcf6 100644 --- a/tensorflow/cc/saved_model/metrics.cc +++ b/tensorflow/cc/saved_model/metrics.cc @@ -107,6 +107,18 @@ auto* saved_model_read_path_and_singleprint = "graph_def_program_hash, signature_def_hash, saved_object_graph_hash, " "and checkpoint_hash) of the loaded SavedModel."); +// Gauge that marks whether or not the fingerprint.pb file was found when +// loading the SavedModel. +// Can hold one of the following string values: +// - "FOUND" +// - "NOT_FOUND" +// - "ERROR" +auto* saved_model_found_fingerprint_on_load = + monitoring::Gauge::New( + "/tensorflow/core/saved_model/found_fingerprint_on_load", + "Whether or not the fingerprint.pb file was found when loading the " + "SavedModel."); + // Distribution of checkpoint write durations. auto* checkpoint_write_durations = monitoring::Sampler<1>::New( { @@ -251,6 +263,10 @@ ParseSavedModelPathAndSingleprint(std::string path_and_singleprint) { return std::pair(path, singleprint); } +monitoring::GaugeCell& SavedModelFoundFingerprintOnLoad() { + return *saved_model_found_fingerprint_on_load->GetCell(); +} + monitoring::SamplerCell& CheckpointReadDuration(absl::string_view api_label) { return *checkpoint_read_durations->GetCell(std::string(api_label)); } diff --git a/tensorflow/cc/saved_model/metrics.h b/tensorflow/cc/saved_model/metrics.h index 8a77be00bd22cf..21461206f76956 100644 --- a/tensorflow/cc/saved_model/metrics.h +++ b/tensorflow/cc/saved_model/metrics.h @@ -35,6 +35,10 @@ limitations under the License. namespace tensorflow { namespace metrics { +const char kFingerprintFound[] = "FOUND"; +const char kFingerprintNotFound[] = "NOT_FOUND"; +const char kFingerprintError[] = "ERROR"; + // Returns "/tensorflow/core/saved_model/write/count" cell. This metric // has 1 field "write_version", which is equal to the // `tensorflow::libexport::GetWriteVersion` of the protobuf and should be @@ -47,6 +51,16 @@ monitoring::CounterCell& SavedModelWriteCount(absl::string_view write_version); // incremented when a SavedModel has been successfully read. monitoring::CounterCell& SavedModelReadCount(absl::string_view write_version); +// Returns "/tensorflow/core/saved_model/write/api" cell. This metric has 1 +// field "api_label" which corresponds to a SavedModel write API. The cell for +// `foo` should be incremented when the write API `foo` is called. +monitoring::CounterCell& SavedModelWriteApi(absl::string_view api_label); + +// Returns "/tensorflow/core/saved_model/read/api" cell. This metric has 1 +// field "api_label" which corresponds to a SavedModel read API. The cell for +// `foo` should be incremented when the read API `foo` is called. +monitoring::CounterCell& SavedModelReadApi(absl::string_view api_label); + // Returns "/tensorflow/core/saved_model/write/fingerprint" cell, which contains // the saved_model_checksum of the SM's fingerprint when it is exported. monitoring::GaugeCell& SavedModelWriteFingerprint(); @@ -87,15 +101,9 @@ absl::StatusOr MakeSavedModelPathAndSingleprint( absl::StatusOr> ParseSavedModelPathAndSingleprint(std::string path_and_singleprint); -// Returns "/tensorflow/core/saved_model/write/api" cell. This metric has 1 -// field "api_label" which corresponds to a SavedModel write API. The cell for -// `foo` should be incremented when the write API `foo` is called. -monitoring::CounterCell& SavedModelWriteApi(absl::string_view api_label); - -// Returns "/tensorflow/core/saved_model/read/api" cell. This metric has 1 -// field "api_label" which corresponds to a SavedModel read API. The cell for -// `foo` should be incremented when the read API `foo` is called. -monitoring::CounterCell& SavedModelReadApi(absl::string_view api_label); +// Returns string status indicating whether or not the fingerprint.pb file was +// found when loading the SavedModel. +monitoring::GaugeCell& SavedModelFoundFingerprintOnLoad(); // Returns "/tensorflow/core/checkpoint/read/read_durations" cell belonging to // field `api_label`. diff --git a/tensorflow/cc/saved_model/metrics_test.cc b/tensorflow/cc/saved_model/metrics_test.cc index 4ab3174881fd8b..76cb0c940da82a 100644 --- a/tensorflow/cc/saved_model/metrics_test.cc +++ b/tensorflow/cc/saved_model/metrics_test.cc @@ -175,5 +175,16 @@ TEST(MetricsTest, TestMakeFingerprintJson) { EXPECT_EQ(fingerprint_json["checkpoint_hash"].asUInt64(), 5); } +TEST(MetricsTest, TestFoundFingerprintOnLoad) { + EXPECT_EQ(SavedModelFoundFingerprintOnLoad().value(), ""); + + SavedModelFoundFingerprintOnLoad().Set(kFingerprintFound); + EXPECT_EQ(SavedModelFoundFingerprintOnLoad().value(), "FOUND"); + SavedModelFoundFingerprintOnLoad().Set(kFingerprintNotFound); + EXPECT_EQ(SavedModelFoundFingerprintOnLoad().value(), "NOT_FOUND"); + SavedModelFoundFingerprintOnLoad().Set(kFingerprintError); + EXPECT_EQ(SavedModelFoundFingerprintOnLoad().value(), "ERROR"); +} + } // namespace metrics } // namespace tensorflow diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py index 71ef550783763d..2be4ee77db7e62 100644 --- a/tensorflow/python/saved_model/load.py +++ b/tensorflow/python/saved_model/load.py @@ -1080,18 +1080,23 @@ def load_partial(export_dir, filters, tags=None, options=None): try: fingerprint = fingerprinting.read_fingerprint(export_dir) except FileNotFoundError: + metrics.SetFoundFingerprintOnLoad(found_status=metrics.kFingerprintNotFound) logging.info( "Fingerprint not found. Saved model loading will continue.") singleprint = "" except RuntimeError: + metrics.SetFoundFingerprintOnLoad(found_status=metrics.kFingerprintError) logging.exception( - "Fingerprint was found, but there was an error when reading the proto.") + "Fingerprint was found, but there was an error when reading the proto. " + "Saved model loading will continue.") singleprint = "" else: + metrics.SetFoundFingerprintOnLoad(found_status=metrics.kFingerprintFound) metrics.SetReadFingerprint( fingerprint=fingerprinting_utils.to_proto( fingerprint).SerializeToString()) singleprint = fingerprint.singleprint() + try: metrics.SetReadPathAndSingleprint(path=export_dir, singleprint=singleprint) except metrics.MetricException: diff --git a/tensorflow/python/saved_model/pywrap_saved_model/metrics.pyi b/tensorflow/python/saved_model/pywrap_saved_model/metrics.pyi index e73e6dc2fb3b1e..21f00a6396e129 100644 --- a/tensorflow/python/saved_model/pywrap_saved_model/metrics.pyi +++ b/tensorflow/python/saved_model/pywrap_saved_model/metrics.pyi @@ -15,6 +15,10 @@ from typing import Any, Tuple +kFingerprintError: str +kFingerprintFound: str +kFingerprintNotFound: str + class MetricException(Exception): ... def AddAsyncCheckpointWriteDuration(*args, **kwargs) -> Any: ... @@ -26,6 +30,7 @@ def GetAsyncCheckpointWriteDurations(*args, **kwargs) -> Any: ... def GetCheckpointReadDurations(*args, **kwargs) -> Any: ... def GetCheckpointSize(*args, **kwargs) -> Any: ... def GetCheckpointWriteDurations(*args, **kwargs) -> Any: ... +def GetFoundFingerprintOnLoad() -> str: ... def GetRead(*args, **kwargs) -> Any: ... def GetReadApi(arg0: str) -> int: ... def GetReadFingerprint() -> str: ... @@ -42,6 +47,7 @@ def IncrementReadApi(arg0: str) -> None: ... def IncrementWrite(*args, **kwargs) -> Any: ... def IncrementWriteApi(arg0: str) -> None: ... def RecordCheckpointSize(*args, **kwargs) -> Any: ... +def SetFoundFingerprintOnLoad(*args, **kwargs) -> Any: ... def SetReadFingerprint(*args, **kwargs) -> Any: ... def SetReadPath(*args, **kwargs) -> Any: ... def SetReadPathAndSingleprint(*args, **kwargs) -> Any: ... diff --git a/tensorflow/python/saved_model/pywrap_saved_model_metrics.cc b/tensorflow/python/saved_model/pywrap_saved_model_metrics.cc index e6325f7598d12a..eac5bdc30660cb 100644 --- a/tensorflow/python/saved_model/pywrap_saved_model_metrics.cc +++ b/tensorflow/python/saved_model/pywrap_saved_model_metrics.cc @@ -261,6 +261,34 @@ void DefineMetricsModule(py::module main_module) { "Get tuple of `path` and `singleprint` values of " "'/tensorflow/core/saved_model/write/path_and_singleprint' gauge.")); + m.attr("kFingerprintFound") = metrics::kFingerprintFound; + m.attr("kFingerprintNotFound") = metrics::kFingerprintNotFound; + m.attr("kFingerprintError") = metrics::kFingerprintError; + + m.def( + "GetFoundFingerprintOnLoad", + []() { return metrics::SavedModelFoundFingerprintOnLoad().value(); }, + py::doc( + "Get value of " + "'/tensorflow/core/saved_model/found_fingerprint_on_load' gauge.")); + + m.def( + "SetFoundFingerprintOnLoad", + [](std::string found_status) { + if (found_status == metrics::kFingerprintFound || + found_status == metrics::kFingerprintNotFound || + found_status == metrics::kFingerprintError) { + metrics::SavedModelFoundFingerprintOnLoad().Set(found_status); + } else { + metrics::SavedModelFoundFingerprintOnLoad().Set(""); + } + }, + py::kw_only(), py::arg("found_status"), + py::doc("Set value of " + "'/tensorflow/core/saved_model/found_fingerprint_on_load' gauge " + "with 'found_status' if status is one of { \"FOUND\", " + "\"NOT_FOUND\", \"ERROR\" }.")); + m.def( "AddCheckpointReadDuration", [](const char* api_label, double microseconds) { diff --git a/tensorflow/python/saved_model/pywrap_saved_model_metrics_test.py b/tensorflow/python/saved_model/pywrap_saved_model_metrics_test.py index 9920a0302fc5ca..dcf057ffd976ea 100644 --- a/tensorflow/python/saved_model/pywrap_saved_model_metrics_test.py +++ b/tensorflow/python/saved_model/pywrap_saved_model_metrics_test.py @@ -195,6 +195,23 @@ def test_SM_write_invalid_path_and_singleprint(self): str(excinfo.exception), "Invalid path_and_singleprint argument. Empty singleprint.") + def test_SM_found_fingerprint_on_load(self): + metrics.SetFoundFingerprintOnLoad(found_status=metrics.kFingerprintFound) + self.assertEqual(metrics.GetFoundFingerprintOnLoad(), "FOUND") + + metrics.SetFoundFingerprintOnLoad(found_status=metrics.kFingerprintNotFound) + self.assertEqual(metrics.GetFoundFingerprintOnLoad(), "NOT_FOUND") + + metrics.SetFoundFingerprintOnLoad(found_status=metrics.kFingerprintError) + self.assertEqual(metrics.GetFoundFingerprintOnLoad(), "ERROR") + + def test_invalid_SM_found_fingerprint_on_load(self): + metrics.SetFoundFingerprintOnLoad(found_status="absolute nonsense") + self.assertEqual(metrics.GetFoundFingerprintOnLoad(), "") + + metrics.SetFoundFingerprintOnLoad(found_status="found") + self.assertEqual(metrics.GetFoundFingerprintOnLoad(), "") + if __name__ == "__main__": test.main() diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt index 2dedeaa23d5ba2..0f52e9fa804dae 100644 --- a/tensorflow/tools/def_file_filter/symbols_pybind.txt +++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt @@ -462,6 +462,7 @@ tensorflow::metrics::SavedModelReadPathAndSingleprint tensorflow::metrics::MakeFingerprintJson tensorflow::metrics::MakeSavedModelPathAndSingleprint tensorflow::metrics::ParseSavedModelPathAndSingleprint +tensorflow::metrics::SavedModelFoundFingerprintOnLoad tensorflow::metrics::CheckpointReadDuration tensorflow::metrics::CheckpointWriteDuration tensorflow::metrics::AsyncCheckpointWriteDuration diff --git a/third_party/xla/third_party/tsl/tools/def_file_filter/symbols_pybind.txt b/third_party/xla/third_party/tsl/tools/def_file_filter/symbols_pybind.txt index 2dedeaa23d5ba2..0f52e9fa804dae 100644 --- a/third_party/xla/third_party/tsl/tools/def_file_filter/symbols_pybind.txt +++ b/third_party/xla/third_party/tsl/tools/def_file_filter/symbols_pybind.txt @@ -462,6 +462,7 @@ tensorflow::metrics::SavedModelReadPathAndSingleprint tensorflow::metrics::MakeFingerprintJson tensorflow::metrics::MakeSavedModelPathAndSingleprint tensorflow::metrics::ParseSavedModelPathAndSingleprint +tensorflow::metrics::SavedModelFoundFingerprintOnLoad tensorflow::metrics::CheckpointReadDuration tensorflow::metrics::CheckpointWriteDuration tensorflow::metrics::AsyncCheckpointWriteDuration From 11d6dacde62d48bdd9ba8af063ffbcb2df6ce0a7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 22 Sep 2023 12:37:38 -0700 Subject: [PATCH 164/567] Integrate LLVM at llvm/llvm-project@6e3827af98fa Updates LLVM usage to match [6e3827af98fa](https://github.com/llvm/llvm-project/commit/6e3827af98fa) PiperOrigin-RevId: 567691274 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 288b1748010f35..eb146861cd0cb1 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "058222b2316615194c089f2bc68d11341f39d26e" - LLVM_SHA256 = "99d3c38eb11dee8f00bd74b69152d961ab73cf4488842f6120e81342eeb94a3b" + LLVM_COMMIT = "6e3827af98fa59d5147598972625a5317936c31f" + LLVM_SHA256 = "bda0e24e2b92f19d2929237101edc1f66fa64f5407d32cbabaf44f878ff0827c" tf_http_archive( name = name, From 56a9f7f8404b95d62c1bae880fdaf88893166316 Mon Sep 17 00:00:00 2001 From: Edward Schwartz Date: Fri, 22 Sep 2023 12:40:55 -0700 Subject: [PATCH 165/567] Bincount with weights testing and documentation improvements * Enable tests for Bincount with weights for `xla_gpu` and only skip for `gpu`. * Update error message in GPU Bincount kernel to add `tf.function(jit_compile=True)` suggestion for using weights. * Document equivalence between bincount and unsorted_segment sum and possible use as workaround on GPU when not using XLA (for Tensor and RaggedTensor but not SparseTensor). * Update examples in documentation to use doctest and other documentation improvements. (No behavior changes.) PiperOrigin-RevId: 567692049 --- tensorflow/core/kernels/bincount_op_gpu.cu.cc | 3 +- tensorflow/python/ops/bincount_ops.py | 46 ++++++++++++------- tensorflow/python/ops/bincount_ops_test.py | 3 +- .../python/ops/ragged/ragged_bincount_ops.py | 27 ++++++++--- .../ops/ragged/ragged_bincount_ops_test.py | 3 +- .../python/ops/sparse_bincount_ops_test.py | 3 +- tensorflow/python/ops/sparse_ops.py | 20 +++++--- 7 files changed, 72 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc index cb3522c1c015ec..8389fb2a8e1180 100644 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -44,7 +44,8 @@ struct BincountFunctor { if (weights.size() != 0) { return errors::Unimplemented( "Weights are not yet supported by the GPU implementation of Bincount." - " Please use unsorted_segment_sum instead."); + " Please use unsorted_segment_sum instead or put Bincount inside" + " tf.function(jit_compile=True)."); } if (output.size() == 0) { return OkStatus(); diff --git a/tensorflow/python/ops/bincount_ops.py b/tensorflow/python/ops/bincount_ops.py index 7d19fc3a4cbffd..f04866a223e907 100644 --- a/tensorflow/python/ops/bincount_ops.py +++ b/tensorflow/python/ops/bincount_ops.py @@ -40,14 +40,11 @@ def bincount(arr, If `minlength` and `maxlength` are not given, returns a vector with length `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. - If `weights` are non-None, then index `i` of the output stores the sum of the - value in `weights` at each index where the corresponding value in `arr` is - `i`. - ```python - values = tf.constant([1,1,2,3,2,4,4,5]) - tf.math.bincount(values) #[0 2 2 1 2 1] - ``` + >>> values = tf.constant([1,1,2,3,2,4,4,5]) + >>> tf.math.bincount(values) + + Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6 will be the vector length. @@ -55,14 +52,32 @@ def bincount(arr, index. Here, index 1 in output has a value 2. This indicates value 1 occurs two times in `values`. - ```python - values = tf.constant([1,1,2,3,2,4,4,5]) - weights = tf.constant([1,5,0,1,0,5,4,5]) - tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5] - ``` - Bin will be incremented by the corresponding weight instead of 1. - Here, index 1 in output has a value 6. This is the summation of weights - corresponding to the value in `values`. + **Bin-counting with weights** + + >>> values = tf.constant([1,1,2,3,2,4,4,5]) + >>> weights = tf.constant([1,5,0,1,0,5,4,5]) + >>> tf.math.bincount(values, weights=weights) + + + When `weights` is specified, bins will be incremented by the corresponding + weight instead of 1. Here, index 1 in output has a value 6. This is the + summation of `weights` corresponding to the value in `values` (i.e. for index + 1, the first two values are 1 so the first two weights, 1 and 5, are + summed). + + There is an equivilance between bin-counting with weights and + `unsorted_segement_sum` where `data` is the weights and `segment_ids` are the + values. + + >>> values = tf.constant([1,1,2,3,2,4,4,5]) + >>> weights = tf.constant([1,5,0,1,0,5,4,5]) + >>> tf.math.unsorted_segment_sum(weights, values, num_segments=6).numpy() + array([0, 6, 0, 1, 9, 5], dtype=int32) + + On GPU, `bincount` with weights is only supported when XLA is enabled + (typically when a function decorated with `@tf.function(jit_compile=True)`). + `unsorted_segment_sum` can be used as a workaround for the non-XLA case on + GPU. **Bin-counting matrix rows independently** @@ -76,7 +91,6 @@ def bincount(arr, array([[1, 1, 1, 1], [2, 1, 1, 0]], dtype=int32)> - **Bin-counting with binary_output** This example gives binary output instead of counting the occurrence. diff --git a/tensorflow/python/ops/bincount_ops_test.py b/tensorflow/python/ops/bincount_ops_test.py index 4e14b1b8a45ef8..d1eff521883077 100644 --- a/tensorflow/python/ops/bincount_ops_test.py +++ b/tensorflow/python/ops/bincount_ops_test.py @@ -319,7 +319,8 @@ def test_weights( expected, axis=None, ): - if "GPU" in set([d.device_type for d in tf_config.list_physical_devices()]): + device_set = set([d.device_type for d in tf_config.list_physical_devices()]) + if "GPU" in device_set and not test_util.is_xla_enabled(): self.skipTest( "b/263004039 The DenseBincount GPU kernel does not support weights." " unsorted_segment_sum should be used instead on GPU." diff --git a/tensorflow/python/ops/ragged/ragged_bincount_ops.py b/tensorflow/python/ops/ragged/ragged_bincount_ops.py index 1e4cc273701d60..7cae73ba66db8c 100644 --- a/tensorflow/python/ops/ragged/ragged_bincount_ops.py +++ b/tensorflow/python/ops/ragged/ragged_bincount_ops.py @@ -41,9 +41,6 @@ def bincount(arr: ragged_tensor.RaggedTensor, If `minlength` and `maxlength` are not given, returns a vector with length `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. - If `weights` are non-None, then index `i` of the output stores the sum of the - value in `weights` at each index where the corresponding value in `arr` is - `i`. >>> data = tf.ragged.constant([[1, 1], [2, 3, 2, 4, 4, 5]]) >>> tf.math.bincount(data) @@ -56,14 +53,32 @@ def bincount(arr: ragged_tensor.RaggedTensor, index. Here, index 1 in output has a value 2. This indicates value 1 occurs two times in `values`. + **Bin-counting with weights** + >>> data = tf.ragged.constant([[1, 1], [2, 3, 2, 4, 4, 5]]) >>> weights = tf.ragged.constant([[1, 5], [0, 1, 0, 5, 4, 5]]) >>> tf.math.bincount(data, weights=weights) - Bin will be incremented by the corresponding weight instead of 1. - Here, index 1 in output has a value 6. This is the summation of weights - corresponding to the value in `values`. + When `weights` is specified, bins will be incremented by the corresponding + weight instead of 1. Here, index 1 in output has a value 6. This is the + summation of `weights` corresponding to the value in `arr` (i.e. for index + 1, the first two values `arr` are 1 so the first two weights, 1 and 5, are + summed). + + There is an equivilance between bin-counting with weights and + `unsorted_segement_sum` where `data` is the weights and `segment_ids` are the + values. + + >>> data = tf.ragged.constant([[1, 1], [2, 3, 2, 4, 4, 5]]) + >>> weights = tf.ragged.constant([[1, 5], [0, 1, 0, 5, 4, 5]]) + >>> tf.math.unsorted_segment_sum(weights, data, num_segments=6).numpy() + array([0, 6, 0, 1, 9, 5], dtype=int32) + + On GPU, `bincount` with weights is only supported when XLA is enabled + (typically when a function decorated with `@tf.function(jit_compile=True)`). + `unsorted_segment_sum` can be used as a workaround for the non-XLA case on + GPU. **Bin-counting matrix rows independently** diff --git a/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py b/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py index bda3dc32c6eedd..79b046d3072a53 100644 --- a/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py +++ b/tensorflow/python/ops/ragged/ragged_bincount_ops_test.py @@ -281,7 +281,8 @@ def test_weights( expected, axis, ): - if "GPU" in set([d.device_type for d in tf_config.list_physical_devices()]): + device_set = set([d.device_type for d in tf_config.list_physical_devices()]) + if "GPU" in device_set and not test_util.is_xla_enabled(): self.skipTest( "b/263004039 The DenseBincount GPU kernel does not support weights." " unsorted_segment_sum should be used instead on GPU." diff --git a/tensorflow/python/ops/sparse_bincount_ops_test.py b/tensorflow/python/ops/sparse_bincount_ops_test.py index f5f86603bfd060..e6cc1c1fb747e8 100644 --- a/tensorflow/python/ops/sparse_bincount_ops_test.py +++ b/tensorflow/python/ops/sparse_bincount_ops_test.py @@ -589,7 +589,8 @@ def test_weights( expected, axis, ): - if "GPU" in set([d.device_type for d in tf_config.list_physical_devices()]): + device_set = set([d.device_type for d in tf_config.list_physical_devices()]) + if "GPU" in device_set and not test_util.is_xla_enabled(): self.skipTest( "b/263004039 The DenseBincount GPU kernel does not support weights." " unsorted_segment_sum should be used instead on GPU." diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 82cee2d96e0866..74cc5e4eddfd0a 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -3012,9 +3012,6 @@ def bincount(arr: sparse_tensor.SparseTensor, If `minlength` and `maxlength` are not given, returns a vector with length `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. - If `weights` are non-None, then index `i` of the output stores the sum of the - value in `weights` at each index where the corresponding value in `arr` is - `i`. >>> data = tf.sparse.SparseTensor( ... indices=[[0, 3], [1, 7], [2, 4], [3, 0], @@ -3031,6 +3028,8 @@ def bincount(arr: sparse_tensor.SparseTensor, index. Here, index 1 in output has a value 2. This indicates value 1 occurs two times in `values`. + **Bin-counting with weights** + >>> indices=[[0, 3], [1, 7], [2, 4], [3, 0], [4, 9], [5, 1], [6, 8], [7, 2]] >>> data = tf.sparse.SparseTensor( ... indices=indices, @@ -3043,9 +3042,15 @@ def bincount(arr: sparse_tensor.SparseTensor, >>> tf.math.bincount(data, weights=weights) - Bin will be incremented by the corresponding weight instead of 1. - Here, index 1 in output has a value 6. This is the summation of weights - corresponding to the value in `values`. + When `weights` is specified, bins will be incremented by the corresponding + weight instead of 1. Here, index 1 in output has a value 6. This is the + summation of `weights` corresponding to the value in `values` (i.e. for index + 1, the first two data values are 1 so the first two weights, 1 and 5, are + summed). + + On GPU, `bincount` with weights is only supported when `axis=0` and XLA is + enabled (typically when a function decorated with + `@tf.function(jit_compile=True)`). **Bin-counting matrix rows independently** @@ -3128,7 +3133,8 @@ def bincount(arr: sparse_tensor.SparseTensor, name: A name scope for the associated operations (optional). axis: The axis to slice over. Axes at and below `axis` will be flattened before bin counting. Currently, only `0`, and `-1` are supported. If None, - all axes will be flattened (identical to passing `0`). + all axes will be flattened (identical to passing `0`). XLA does not + support `axis=-1`. binary_output: If True, this op will output 1 instead of the number of times a token appears (equivalent to one_hot + reduce_any instead of one_hot + reduce_add). Defaults to False. From 5e2d5c50c2e9a7ecce00c8859d8f7fa1fddb752c Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Fri, 22 Sep 2023 12:51:01 -0700 Subject: [PATCH 166/567] Internal Code Change PiperOrigin-RevId: 567694546 --- tensorflow/BUILD | 1 - tensorflow/compiler/tf2xla/BUILD | 1 - tensorflow/python/BUILD | 2 -- tensorflow/python/distribute/BUILD | 3 --- tensorflow/python/framework/BUILD | 2 -- tensorflow/python/layers/BUILD | 2 +- tensorflow/python/lib/core/BUILD | 1 - tensorflow/python/ops/BUILD | 1 - tensorflow/python/util/BUILD | 1 - tensorflow/python/util/keras_deps.py | 2 +- tensorflow/python/util/protobuf/BUILD | 1 - third_party/xla/third_party/tsl/tsl/BUILD | 1 - 12 files changed, 2 insertions(+), 16 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 3bd7e8564bee16..840e3d4c174664 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -1054,7 +1054,6 @@ package_group( "//third_party/py/cloud_ml_autoflow/...", "//third_party/py/envlogger/...", "//third_party/py/gldm/...", - "//third_party/py/keras/...", "//third_party/py/tf_keras/...", "//third_party/yggdrasil_decision_forests/...", "//waymo/ml/cn/...", diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 44ecb3ee0e2b27..08a499f25c2083 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -50,7 +50,6 @@ package_group( "//third_party/deepmind/deepmind_research/density_functional_approximation_dm21/...", "//third_party/mlir_edge/model_curriculum/iree/...", "//third_party/mlperf/submissions/training/v0_7/models/...", - "//third_party/py/keras/...", "//third_party/py/keras_cv/...", "//third_party/py/tf_keras/...", "//waymo/ml/deploy/benchmark/...", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4f7b1427e37ce1..4d79a70ca67e2c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -47,7 +47,6 @@ visibility = [ "//third_party/py/tensorflow_recommenders:__subpackages__", "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", - "//third_party/py/keras:__subpackages__", "//third_party/py/tf_keras:__subpackages__", "//third_party/py/starcraft2:__subpackages__", "//third_party/py/tensorflow_gnn:__subpackages__", @@ -129,7 +128,6 @@ py_strict_library( "//tensorflow/python/tools/api/generator:__pkg__", "//tensorflow/tools/api/tests:__pkg__", "//tensorflow/tools/compatibility/update:__pkg__", - "//third_party/py/keras:__subpackages__", "//third_party/py/tensorflow_core:__subpackages__", ], deps = [ diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 62662abd82517c..0c233b2b4a455b 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -8,7 +8,6 @@ package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], default_visibility = [ "//tensorflow:internal", - "//third_party/py/keras:__subpackages__", # TODO(scottzhu): remove this once keras is relying on tf.__internal__. ], licenses = ["notice"], ) @@ -1107,7 +1106,6 @@ py_strict_library( visibility = [ "//tensorflow:internal", "//tensorflow_models:__subpackages__", - "//third_party/py/keras:__subpackages__", ], deps = [ ":collective_all_reduce_strategy", @@ -1154,7 +1152,6 @@ py_strict_library( visibility = [ "//tensorflow:internal", "//tensorflow_models:__subpackages__", - "//third_party/py/keras:__subpackages__", ], deps = [ ":central_storage_strategy", diff --git a/tensorflow/python/framework/BUILD b/tensorflow/python/framework/BUILD index 09f576bce4a250..a5ca3b6ea44acc 100644 --- a/tensorflow/python/framework/BUILD +++ b/tensorflow/python/framework/BUILD @@ -2035,7 +2035,6 @@ py_strict_library( "//tensorflow_estimator/python/estimator:__subpackages__", "//tensorflow_model_optimization:__subpackages__", "//third_party/cloud_tpu/convergence_tools:__subpackages__", - "//third_party/py/keras:__subpackages__", "//third_party/py/neural_structured_learning:__subpackages__", "//third_party/py/tf_agents:__subpackages__", "//third_party/py/tf_keras:__subpackages__", @@ -2109,7 +2108,6 @@ py_strict_library( visibility = visibility + [ "//tensorflow:internal", "//tensorflow_models:__subpackages__", - "//third_party/py/keras:__subpackages__", "//third_party/py/keras_cv:__subpackages__", "//third_party/py/tf_keras:__subpackages__", ], diff --git a/tensorflow/python/layers/BUILD b/tensorflow/python/layers/BUILD index d1a56ff4bc9506..0a80d5731f7c13 100644 --- a/tensorflow/python/layers/BUILD +++ b/tensorflow/python/layers/BUILD @@ -64,7 +64,7 @@ py_strict_library( "//tensorflow/python/keras/legacy_tf_layers:core", "//tensorflow/python/keras/legacy_tf_layers:pooling", "//tensorflow/python/util:lazy_loader", - # Normalization layer will need //third_party/py/keras/legacy_tf_layers:normalization + # Normalization layer will need //third_party/py/tf_keras/legacy_tf_layers:normalization # Client lib should import that, since this target can't import it due to # circular dependency. ], diff --git a/tensorflow/python/lib/core/BUILD b/tensorflow/python/lib/core/BUILD index 67385a894a37d9..a6904053c60b76 100644 --- a/tensorflow/python/lib/core/BUILD +++ b/tensorflow/python/lib/core/BUILD @@ -18,7 +18,6 @@ visibility = [ "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", "//third_party/py/tensorflow_docs:__subpackages__", - "//third_party/py/keras:__subpackages__", ] package( diff --git a/tensorflow/python/ops/BUILD b/tensorflow/python/ops/BUILD index 00679cbf1d36ec..cba5c7c60e69ac 100644 --- a/tensorflow/python/ops/BUILD +++ b/tensorflow/python/ops/BUILD @@ -25,7 +25,6 @@ visibility = [ "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", "//third_party/py/tf_keras:__subpackages__", - "//third_party/py/keras:__subpackages__", "//third_party/py/starcraft2:__subpackages__", "//third_party/py/tensorflow_gnn:__subpackages__", "//third_party/py/tensorflow_numerics:__subpackages__", diff --git a/tensorflow/python/util/BUILD b/tensorflow/python/util/BUILD index 9e0eb80d2fb1dd..d1e7d626a1d3e7 100644 --- a/tensorflow/python/util/BUILD +++ b/tensorflow/python/util/BUILD @@ -23,7 +23,6 @@ visibility = [ "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", "//third_party/py/tensorflow_docs:__subpackages__", - "//third_party/py/keras:__subpackages__", "//third_party/py/tf_keras:__subpackages__", ] diff --git a/tensorflow/python/util/keras_deps.py b/tensorflow/python/util/keras_deps.py index 837b121f5bd669..99daeaa2378634 100644 --- a/tensorflow/python/util/keras_deps.py +++ b/tensorflow/python/util/keras_deps.py @@ -61,7 +61,7 @@ def register_load_model_function(func): # This is used to register the in_load_context function in -# third_party/py/keras/saving/saved_model/load_context.py for use in +# third_party/py/tf_keras/saving/saved_model/load_context.py for use in # third_party/tensorflow library. @tf_export('__internal__.register_load_context_function', v1=[]) def register_load_context_function(func): diff --git a/tensorflow/python/util/protobuf/BUILD b/tensorflow/python/util/protobuf/BUILD index f026e3734f33f9..783e6a64707329 100644 --- a/tensorflow/python/util/protobuf/BUILD +++ b/tensorflow/python/util/protobuf/BUILD @@ -21,7 +21,6 @@ visibility = [ "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", "//third_party/py/tensorflow_docs:__subpackages__", - "//third_party/py/keras:__subpackages__", ] package( diff --git a/third_party/xla/third_party/tsl/tsl/BUILD b/third_party/xla/third_party/tsl/tsl/BUILD index 7045fe93da7e0e..e40e9900022b1a 100644 --- a/third_party/xla/third_party/tsl/tsl/BUILD +++ b/third_party/xla/third_party/tsl/tsl/BUILD @@ -441,7 +441,6 @@ package_group( "//third_party/cloud_tpu/inference_converter/...", "//third_party/py/cloud_ml_autoflow/...", "//third_party/py/envlogger/...", - "//third_party/py/keras/...", "//third_party/yggdrasil_decision_forests/...", "//tsl/...", ], From 7c8a95f2ab9b8996eccf5c33729018a45af467cb Mon Sep 17 00:00:00 2001 From: Shixin Li Date: Fri, 22 Sep 2023 13:05:26 -0700 Subject: [PATCH 167/567] Enable cross compilation for PJRT GPU compiler: 1. StreamExecutorGpuCompiler compiles w/o client. 2. Add StreamExecutorGpuExecutable (the unloaded pjrt executable). 3. Load StreamExecutorGpuExecutable to PjRtLoadedExecutable through `Load` API. PiperOrigin-RevId: 567697879 --- third_party/xla/xla/client/local_client.h | 2 + third_party/xla/xla/pjrt/BUILD | 16 ++ third_party/xla/xla/pjrt/gpu/BUILD | 95 +++++++++- .../xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc | 45 +++++ .../xla/xla/pjrt/gpu/se_gpu_pjrt_client.h | 5 + .../xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.cc | 108 +++++++++++ .../xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.h | 15 ++ .../pjrt/gpu/se_gpu_pjrt_compiler_aot_test.cc | 167 ++++++++++++++++++ .../xla/pjrt/gpu/se_gpu_pjrt_compiler_test.cc | 1 + .../xla/pjrt/pjrt_stream_executor_client.cc | 39 ++-- .../xla/pjrt/pjrt_stream_executor_client.h | 1 + .../stream_executor_unloaded_executable.cc | 31 ++++ .../stream_executor_unloaded_executable.h | 78 ++++++++ .../stream_executor_unloaded_executable.proto | 28 +++ third_party/xla/xla/service/gpu/BUILD | 14 ++ .../xla/xla/service/gpu/gpu_compiler.cc | 15 -- .../xla/xla/service/gpu/gpu_compiler.h | 13 +- .../xla/xla/service/gpu/gpu_target_config.cc | 38 ++++ .../xla/xla/service/gpu/gpu_target_config.h | 41 +++++ 19 files changed, 705 insertions(+), 47 deletions(-) create mode 100644 third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_aot_test.cc create mode 100644 third_party/xla/xla/pjrt/stream_executor_unloaded_executable.cc create mode 100644 third_party/xla/xla/pjrt/stream_executor_unloaded_executable.h create mode 100644 third_party/xla/xla/pjrt/stream_executor_unloaded_executable.proto create mode 100644 third_party/xla/xla/service/gpu/gpu_target_config.cc create mode 100644 third_party/xla/xla/service/gpu/gpu_target_config.h diff --git a/third_party/xla/xla/client/local_client.h b/third_party/xla/xla/client/local_client.h index 3b657b5113a00f..f14b98581a9fb2 100644 --- a/third_party/xla/xla/client/local_client.h +++ b/third_party/xla/xla/client/local_client.h @@ -225,6 +225,8 @@ class LocalClient : public Client { const Backend& backend() const; Backend* mutable_backend(); + LocalService* local_service() { return local_service_; } + private: LocalService* local_service_; }; diff --git a/third_party/xla/xla/pjrt/BUILD b/third_party/xla/xla/pjrt/BUILD index a2aae828c0ddd3..dac1259700f4ee 100644 --- a/third_party/xla/xla/pjrt/BUILD +++ b/third_party/xla/xla/pjrt/BUILD @@ -372,6 +372,20 @@ cc_library( ], ) +cc_library( + name = "stream_executor_unloaded_executable", + srcs = ["stream_executor_unloaded_executable.cc"], + hdrs = ["stream_executor_unloaded_executable.h"], + visibility = ["//visibility:public"], + deps = [ + ":pjrt_executable", + "//xla:statusor", + "//xla/hlo/ir:hlo", + "//xla/service:compiler", + "@com_google_absl//absl/status", + ], +) + cc_library( name = "pjrt_stream_executor_client", srcs = ["pjrt_stream_executor_client.cc"], @@ -383,6 +397,7 @@ cc_library( ":metrics", ":mlir_to_hlo", ":pjrt_client", + ":pjrt_executable", ":pjrt_future", ":tracked_device_buffer", ":transpose", @@ -417,6 +432,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", diff --git a/third_party/xla/xla/pjrt/gpu/BUILD b/third_party/xla/xla/pjrt/gpu/BUILD index 931a48d4888d3a..12c50d4c918a80 100644 --- a/third_party/xla/xla/pjrt/gpu/BUILD +++ b/third_party/xla/xla/pjrt/gpu/BUILD @@ -45,15 +45,22 @@ cc_library( ":gpu_topology", "//xla:statusor", "//xla:util", + "//xla:xla_proto_cc", "//xla/client:client_library", + "//xla/client:local_client", + "//xla/pjrt:compile_options_proto_cc", "//xla/pjrt:mlir_to_hlo", "//xla/pjrt:pjrt_client", "//xla/pjrt:pjrt_compiler", + "//xla/pjrt:pjrt_executable", "//xla/pjrt:pjrt_stream_executor_client", + "//xla/pjrt:stream_executor_unloaded_executable", "//xla/pjrt:tracked_device_buffer", "//xla/pjrt:utils", "//xla/pjrt/distributed:client", "//xla/pjrt/distributed:topology_util", + "//xla/service:compiler", + "//xla/service:executable", "//xla/service:platform_util", "//xla/service/gpu:gpu_executable_run_options", "//xla/stream_executor:device_mem_allocator", @@ -63,6 +70,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/log:check", + "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -82,9 +90,11 @@ cc_library( ":nccl_id_store_cuda", "@local_config_cuda//cuda:cuda_headers", "//xla/stream_executor/gpu:gpu_cudamallocasync_allocator", + "//xla/service/gpu:gpu_compiler", ]) + if_rocm([ ":nccl_id_store_rocm", "@local_config_rocm//rocm:rocm_headers", + "//xla/service/gpu:gpu_compiler", ]), ) @@ -202,15 +212,44 @@ cc_library( name = "se_gpu_pjrt_compiler", srcs = ["se_gpu_pjrt_compiler.cc"], hdrs = ["se_gpu_pjrt_compiler.h"], + defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm(["TENSORFLOW_USE_ROCM=1"]), visibility = ["//visibility:public"], deps = [ ":se_gpu_pjrt_client", "//xla:status_macros", + "//xla/client:local_client", + "//xla/client:xla_computation", + "//xla/pjrt:mlir_to_hlo", "//xla/pjrt:pjrt_client", "//xla/pjrt:pjrt_compiler", "//xla/pjrt:pjrt_executable", + "//xla/pjrt:pjrt_stream_executor_client", + "//xla/pjrt:stream_executor_unloaded_executable", + "//xla/pjrt:utils", + "//xla/service:compiler", + "//xla/service:dump", + "//xla/service:hlo_module_util", + "//xla/service:hlo_proto_cc", + "//xla/service:hlo_proto_util", + "//xla/service:local_service", + "//xla/service/gpu:executable_proto_cc", + "//xla/service/gpu:gpu_target_config", + "//xla/stream_executor/cuda:cuda_platform_id", "@com_google_absl//absl/status", - ], + "@local_tsl//tsl/platform:errors", + ] + if_cuda([ + ":nccl_id_store_cuda", + "@local_config_cuda//cuda:cuda_headers", + "//xla/stream_executor/cuda:cuda_activation_header", + "//xla/stream_executor/gpu:gpu_cudamallocasync_allocator", + "//xla/service/gpu:gpu_compiler", + "//xla/service/gpu:nvptx_compiler_impl", + ]) + if_rocm([ + ":nccl_id_store_rocm", + "@local_config_rocm//rocm:rocm_headers", + "//xla/service/gpu:gpu_compiler", + "//xla/service/gpu:amdgpu_compiler_impl", + ]), alwayslink = True, ) @@ -223,10 +262,12 @@ xla_cc_test( ":se_gpu_pjrt_compiler", "//xla/client:xla_computation", "//xla/mlir_hlo", + "//xla/pjrt:pjrt_client", "//xla/service:gpu_plugin", "//xla/service:hlo_parser", "//xla/stream_executor/cuda:cublas_plugin", "//xla/tests:literal_test_util", + "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@llvm-project//mlir:FuncDialect", "@llvm-project//mlir:Parser", @@ -234,3 +275,55 @@ xla_cc_test( "@local_tsl//tsl/platform:test_main", ], ) + +xla_cc_test( + name = "se_gpu_pjrt_compiler_aot_test", + srcs = if_gpu_is_configured(["se_gpu_pjrt_compiler_aot_test.cc"]), + local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]), + tags = [ + "config-cuda-only", + "gpu", + "no_oss", + "no_tap", + "requires-gpu-nvidia", + ], + deps = [ + ":se_gpu_pjrt_client", + ":se_gpu_pjrt_compiler", + "//tensorflow/core/platform:path", + "//third_party/protobuf", + "//xla:literal", + "//xla:literal_util", + "//xla/client:xla_computation", + "//xla/mlir_hlo", + "//xla/pjrt:pjrt_client", + "//xla/pjrt:pjrt_compiler", + "//xla/pjrt:pjrt_executable", + "//xla/service:gpu_plugin", + "//xla/service:hlo_parser", + "//xla/service/gpu:gpu_target_config", + "//xla/stream_executor/cuda:cublas_plugin", + "//xla/tests:literal_test_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings:string_view", + "@com_google_googletest//:gtest", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Parser", + "@local_tsl//tsl/lib/core:status_test_util", + "@local_tsl//tsl/platform:casts", + "@local_tsl//tsl/platform:status_matchers", + "@local_tsl//tsl/platform:statusor", + "@local_tsl//tsl/platform:test_main", + ] + if_cuda([ + ":nccl_id_store_cuda", + "@local_config_cuda//cuda:cuda_headers", + "//xla/stream_executor/cuda:cuda_activation_header", + "//xla/stream_executor/gpu:gpu_cudamallocasync_allocator", + ]) + if_rocm([ + ":nccl_id_store_rocm", + "@local_config_rocm//rocm:rocm_headers", + ]), +) diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc index 567e4e00194975..6df22004640e1b 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc @@ -28,6 +28,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" #include "absl/log/check.h" +#include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/ascii.h" @@ -35,12 +36,17 @@ limitations under the License. #include "absl/strings/str_join.h" #include "absl/synchronization/blocking_counter.h" #include "absl/time/time.h" +#include "xla/client/local_client.h" #include "xla/pjrt/distributed/topology_util.h" #include "xla/pjrt/pjrt_client.h" #include "xla/pjrt/pjrt_compiler.h" +#include "xla/pjrt/pjrt_executable.h" #include "xla/pjrt/pjrt_stream_executor_client.h" +#include "xla/pjrt/stream_executor_unloaded_executable.h" #include "xla/pjrt/tracked_device_buffer.h" #include "xla/pjrt/utils.h" +#include "xla/service/compiler.h" +#include "xla/service/executable.h" #include "xla/stream_executor/device_memory.h" #include "tsl/framework/allocator.h" #include "tsl/framework/bfc_allocator.h" @@ -533,6 +539,45 @@ std::vector> BuildLocalDevices( return devices; } +StatusOr> StreamExecutorGpuClient::Load( + std::unique_ptr executable) { + auto se_executable = + absl::WrapUnique(tensorflow::down_cast( + executable.release())); + + CompileOptions compile_options = se_executable->compile_options(); + TF_RETURN_IF_ERROR(compile_options.ApplyAllOptionOverrides()); + TF_ASSIGN_OR_RETURN(ExecutableExtras extras, + GetExecutableExtras(&compile_options)); + + TF_ASSIGN_OR_RETURN( + auto se_executor, + client()->backend().stream_executor( + compile_options.executable_build_options.device_ordinal())); + + // Load Executable from AOT compilation result. + std::vector> local_executables; + local_executables.reserve(se_executable->aot_executables().size()); + for (std::unique_ptr& aot_executable : + se_executable->aot_executables()) { + TF_ASSIGN_OR_RETURN(std::unique_ptr executable, + aot_executable->LoadExecutable( + client()->backend().compiler(), se_executor)); + local_executables.push_back(std::make_unique( + std::move(executable), client()->local_service()->mutable_backend(), + compile_options.executable_build_options)); + } + bool parameter_is_tupled_arguments = + compile_options.parameter_is_tupled_arguments; + auto ret = std::make_unique( + std::move(local_executables), parameter_is_tupled_arguments, + std::move(extras.device_assignment), std::move(compile_options), + std::move(extras.addressable_device_logical_ids), + std::move(extras.addressable_devices), this); + TF_RETURN_IF_ERROR(ret->SetUpDonation(parameter_is_tupled_arguments)); + return std::unique_ptr(std::move(ret)); +} + namespace { #if defined(GOOGLE_CUDA) && CUDA_VERSION >= 11020 diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h index 00342d028df944..7b2f6da962daa7 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h @@ -195,6 +195,11 @@ class StreamExecutorGpuClient : public xla::PjRtStreamExecutorClient { tensorflow::down_cast(executable.release())); } + // TODO(b/296466237): Unify `Load` method after (de)serialization and tests on + // existing use cases are done. + StatusOr> Load( + std::unique_ptr executable); + private: xla::StreamExecutorGpuTopologyDescription topology_; }; diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.cc index 795a5bd41bc9fe..2535b5d5308ada 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.cc +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.cc @@ -16,12 +16,36 @@ limitations under the License. #include "xla/pjrt/gpu/se_gpu_pjrt_compiler.h" #include +#include #include "absl/status/status.h" +#include "xla/client/xla_computation.h" #include "xla/pjrt/gpu/se_gpu_pjrt_client.h" #include "xla/pjrt/pjrt_client.h" #include "xla/pjrt/pjrt_compiler.h" +#include "xla/pjrt/pjrt_executable.h" #include "xla/status_macros.h" +#include "tsl/platform/errors.h" + +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#include "xla/client/local_client.h" +#include "xla/pjrt/mlir_to_hlo.h" +#include "xla/pjrt/stream_executor_unloaded_executable.h" +#include "xla/pjrt/utils.h" +#include "xla/service/dump.h" +#include "xla/service/gpu/executable.pb.h" +#include "xla/service/gpu/gpu_compiler.h" +#include "xla/service/hlo_module_util.h" +#include "xla/service/hlo_proto_util.h" +#include "xla/service/local_service.h" +#include "xla/stream_executor/cuda/cuda_platform_id.h" +#endif + +#if GOOGLE_CUDA +#include "xla/service/gpu/nvptx_compiler.h" +#elif TENSORFLOW_USE_ROCM +#include "xla/service/gpu/amdgpu_compiler.h" +#endif namespace xla { namespace { @@ -59,6 +83,67 @@ absl::Status IsValidTopologyAndClientForCompile( } return absl::OkStatus(); } + +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +absl::StatusOr> AotCompile( + CompileOptions options, const XlaComputation& computation, + gpu::GpuTargetConfig& gpu_target_config) { + CompileOptions input_options = options; + TF_RETURN_IF_ERROR(options.ApplyAllOptionOverrides()); + + std::vector argument_layout_pointers; + TF_RETURN_IF_ERROR(DetermineArgumentLayoutsFromCompileOptions( + computation, + [](Shape shape) { return LayoutUtil::GetWithDefaultLayout(shape); }, + options.argument_layouts, &options.executable_build_options, + &argument_layout_pointers)); + + // TODO(b/300657649): Call `UpdateBuildOptions` like in LocalClient::Compile. + // TODO(b/300657649): Get HloModuleConfig from `GetHloModuleConfig` like in + // LocalService::CompileExecutables. + HloModuleProto hlo_module_proto = computation.proto(); + TF_ASSIGN_OR_RETURN(ProgramShape shape, computation.GetProgramShape()); + DebugOptions debug_options = DefaultDebugOptionsIgnoringFlags(); + HloModuleConfig config(shape); + config.set_debug_options(debug_options); + + TF_ASSIGN_OR_RETURN(std::unique_ptr hlo_module, + HloModule::CreateFromProto(hlo_module_proto, config)); +#if GOOGLE_CUDA + auto gpu_compiler = gpu::NVPTXCompiler(); +#elif TENSORFLOW_USE_ROCM + auto gpu_compiler = gpu::AMDGPUCompiler(); +#endif + + UpdateEntryComputationLayout( + hlo_module.get(), std::bind(&Compiler::DefaultDeviceShapeRepresentation, + &gpu_compiler, std::placeholders::_1)); + DumpHloModuleIfEnabled(*hlo_module, kBeforeOptimizationsDumpName); + + if (!options.executable_build_options.run_backend_only()) { + TF_ASSIGN_OR_RETURN(hlo_module, + gpu_compiler.RunHloPassesWithoutDevice( + std::move(hlo_module), Compiler::CompileOptions{}, + gpu_target_config, AutotuneResults())); + } + + AotCompilationOptions aot_options(gpu_compiler.PlatformId()); + aot_options.set_target_config(gpu_target_config); + + const int num_replicas = hlo_module->config().replica_count(); + const int num_partitions = hlo_module->config().num_partitions(); + const std::string name = hlo_module->name(); + auto unique_module_group = + std::make_unique(std::move(hlo_module)); + TF_ASSIGN_OR_RETURN( + std::vector> aot_results, + gpu_compiler.CompileAheadOfTime(std::move(unique_module_group), + aot_options)); + return std::make_unique( + std::move(input_options), std::move(aot_results), num_replicas, + num_partitions, name); +} +#endif } // namespace // TODO(b/285385306): Enable compilation on provided `topology`. @@ -67,6 +152,15 @@ StreamExecutorGpuCompiler::Compile(CompileOptions options, const XlaComputation& computation, const PjRtTopologyDescription& topology, PjRtClient* client) { + if (client == nullptr && gpu_target_config_ != std::nullopt) { +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + return AotCompile(options, computation, *gpu_target_config_); +#endif + return absl::InternalError( + "GPU AOT compilation requires the target to be built with CUDA or " + "ROCm."); + } + // TODO(b/296466237): Remove client dependency. TF_RETURN_IF_ERROR(IsValidTopologyAndClientForCompile(topology, client)); return client->Compile(computation, options); } @@ -76,6 +170,20 @@ StreamExecutorGpuCompiler::Compile(CompileOptions options, mlir::ModuleOp module, const PjRtTopologyDescription& topology, PjRtClient* client) { + if (client == nullptr && gpu_target_config_ != std::nullopt) { +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + XlaComputation xla_computation; + TF_RETURN_IF_ERROR(MlirToXlaComputation( + module, xla_computation, + /*use_tuple_args=*/options.parameter_is_tupled_arguments, + /*return_tuple=*/false)); + return AotCompile(options, xla_computation, *gpu_target_config_); +#endif + return absl::InternalError( + "GPU AOT compilation requires the target to be built with CUDA or " + "ROCm."); + } + // TODO(b/296466237): Remove client dependency. TF_RETURN_IF_ERROR(IsValidTopologyAndClientForCompile(topology, client)); return client->Compile(module, options); } diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.h b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.h index 6d44d28b29442b..fe3a67eb5849ef 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.h +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler.h @@ -17,9 +17,13 @@ limitations under the License. #define XLA_PJRT_GPU_SE_GPU_PJRT_COMPILER_H_ #include +#include +#include "absl/status/status.h" #include "xla/pjrt/pjrt_compiler.h" #include "xla/pjrt/pjrt_executable.h" +#include "xla/service/compiler.h" +#include "xla/service/gpu/gpu_target_config.h" namespace xla { // Implements the interfaces that are needed for the registered compiler. @@ -27,6 +31,12 @@ namespace xla { // Compile() functions and ignores the `topology` parameter. class StreamExecutorGpuCompiler : public PjRtCompiler { public: + // If `gpu_target_config` is nullopt, the compiler has to compile with device, + // i.e. calling of `Compile` should depend on the passed-in client's + // compilation functionality. + explicit StreamExecutorGpuCompiler(const std::optional + gpu_target_config = std::nullopt) + : gpu_target_config_(gpu_target_config) {} absl::StatusOr> Compile( CompileOptions options, const XlaComputation& computation, const PjRtTopologyDescription& topology, PjRtClient* client) override; @@ -34,6 +44,11 @@ class StreamExecutorGpuCompiler : public PjRtCompiler { absl::StatusOr> Compile( CompileOptions options, mlir::ModuleOp module, const PjRtTopologyDescription& topology, PjRtClient* client) override; + + private: + // GpuTargetConfig is used by GPU compiler for ahead-of-time (AOT) compilation + // without device. + std::optional gpu_target_config_; }; } // namespace xla #endif // XLA_PJRT_GPU_SE_GPU_PJRT_COMPILER_H_ diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_aot_test.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_aot_test.cc new file mode 100644 index 00000000000000..91c063300117da --- /dev/null +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_aot_test.cc @@ -0,0 +1,167 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include +#include +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/Parser/Parser.h" // from @llvm-project +#include "third_party/protobuf/text_format.h" +#include "xla/client/xla_computation.h" +#include "xla/literal.h" +#include "xla/literal_util.h" +#include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" +#include "xla/pjrt/gpu/se_gpu_pjrt_client.h" +#include "xla/pjrt/gpu/se_gpu_pjrt_compiler.h" +#include "xla/pjrt/pjrt_client.h" +#include "xla/pjrt/pjrt_executable.h" +#include "xla/service/gpu/gpu_target_config.h" +#include "xla/service/hlo_parser.h" +#include "xla/tests/literal_test_util.h" +#include "tsl/platform/casts.h" +#include "tsl/platform/statusor.h" + +namespace xla { +namespace { + +constexpr absl::string_view kProgram = R"(HloModule Computation + +ENTRY Computation() -> s32[] { + ROOT result = s32[] constant(2) +})"; + +constexpr absl::string_view mlir_str = R"mlir( + module { + func.func @main() -> tensor { + %0 = mhlo.constant dense<2> : tensor + return %0 : tensor + } + })mlir"; + +constexpr absl::string_view kGpuTargetConfig = + R"pb( + gpu_device_info { + threads_per_block_limit: 1024 + threads_per_warp: 32 + shared_memory_per_block: 49152 + shared_memory_per_core: 65536 + threads_per_core_limit: 2048 + core_count: 56 + fpus_per_core: 64 + block_dim_limit_x: 2147483647 + block_dim_limit_y: 65535 + block_dim_limit_z: 65535 + memory_bandwidth: 732160000000 + l2_cache_size: 4194304 + clock_rate_ghz: 1.4805 + device_memory_size: 17066622976 + shared_memory_per_block_optin: 49152 + cuda_compute_capability { major: 6 } + } + platform_name: "CUDA" + dnn_version_info {} + device_description_str: "sm_6.0 with 17071734784B RAM, 56 cores, 1480500KHz clock, 715000KHz mem clock, 4194304B L2$" + )pb"; + +absl::StatusOr GetXlaComputation( + absl::string_view program) { + TF_ASSIGN_OR_RETURN(auto hlo_module, + xla::ParseAndReturnUnverifiedModule(program, {})); + + return XlaComputation(hlo_module->ToProto()); +} + +void ValidateResult( + std::vector>>& result) { + ASSERT_EQ(result.size(), 1); + std::vector>& result_buffers = result[0]; + ASSERT_EQ(result_buffers.size(), 1); + TF_ASSERT_OK_AND_ASSIGN(std::shared_ptr result_literal, + result_buffers[0]->ToLiteralSync()); + EXPECT_TRUE( + LiteralTestUtil::Equal(LiteralUtil::CreateR0(2), *result_literal)); +} + +absl::StatusOr GetGpuTargetConfig() { + stream_executor::GpuTargetConfigProto gpu_target_config_proto; + if (!proto2::TextFormat::ParseFromString(kGpuTargetConfig, + &gpu_target_config_proto)) { + return absl::InvalidArgumentError("Failed to parse GpuTargetConfigProto"); + } + return gpu::GpuTargetConfig(gpu_target_config_proto); +} + +TEST(StreamExecutorGpuCompilerTest, SuccessAotCompileMlirAndLoad) { + ASSERT_OK_AND_ASSIGN(const gpu::GpuTargetConfig gpu_config, + GetGpuTargetConfig()); + CompileOptions options = xla::CompileOptions(); + StreamExecutorGpuCompiler compiler(gpu_config); + + TF_ASSERT_OK_AND_ASSIGN( + auto client, GetStreamExecutorGpuClient(true, /*allocator_config=*/{}, + /*node_id=*/0)); + auto se_client = absl::WrapUnique( + tensorflow::down_cast(client.release())); + mlir::MLIRContext context; + context.loadDialect(); + auto mlir_module = + mlir::parseSourceString(mlir_str, &context); + TF_ASSERT_OK_AND_ASSIGN(auto topology, se_client->GetTopologyDescription()); + TF_ASSERT_OK_AND_ASSIGN( + auto executable, compiler.Compile(xla::CompileOptions(), + mlir_module.get(), *topology, nullptr)); + TF_ASSERT_OK_AND_ASSIGN(auto loaded_executable, + se_client->Load(std::move(executable))); + + TF_ASSERT_OK_AND_ASSIGN( + auto result, loaded_executable->Execute(/*argument_handles=*/{{}}, {})); + ValidateResult(result); +} + +TEST(StreamExecutorGpuCompilerTest, SuccessAotCompileXlaAndLoad) { + ASSERT_OK_AND_ASSIGN(const gpu::GpuTargetConfig gpu_config, + GetGpuTargetConfig()); + CompileOptions options = xla::CompileOptions(); + StreamExecutorGpuCompiler compiler(gpu_config); + + TF_ASSERT_OK_AND_ASSIGN( + auto client, GetStreamExecutorGpuClient(true, /*allocator_config=*/{}, + /*node_id=*/0)); + auto se_client = absl::WrapUnique( + tensorflow::down_cast(client.release())); + + TF_ASSERT_OK_AND_ASSIGN(auto computation, GetXlaComputation(kProgram)); + TF_ASSERT_OK_AND_ASSIGN(auto topology, se_client->GetTopologyDescription()); + TF_ASSERT_OK_AND_ASSIGN( + auto executable, + compiler.Compile(xla::CompileOptions(), computation, *topology, nullptr)); + TF_ASSERT_OK_AND_ASSIGN(auto loaded_executable, + se_client->Load(std::move(executable))); + TF_ASSERT_OK_AND_ASSIGN( + auto result, loaded_executable->Execute(/*argument_handles=*/{{}}, {})); + ValidateResult(result); +} + +} // namespace +} // namespace xla diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_test.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_test.cc index e238b1a1a8ad38..d4bc1beb3f74e4 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_test.cc +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_compiler_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "xla/client/xla_computation.h" #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" #include "xla/pjrt/gpu/se_gpu_pjrt_client.h" +#include "xla/pjrt/pjrt_client.h" #include "xla/service/hlo_parser.h" #include "xla/tests/literal_test_util.h" #include "tsl/platform/status_matchers.h" diff --git a/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc b/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc index c5075678754062..f9fa35debcb7b0 100644 --- a/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc +++ b/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc @@ -80,6 +80,7 @@ limitations under the License. #include "absl/algorithm/container.h" #include "absl/base/casts.h" +#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/strings/match.h" @@ -100,6 +101,7 @@ limitations under the License. #include "xla/pjrt/metrics.h" #include "xla/pjrt/mlir_to_hlo.h" #include "xla/pjrt/pjrt_client.h" +#include "xla/pjrt/pjrt_executable.h" #include "xla/pjrt/pjrt_future.h" #include "xla/pjrt/tracked_device_buffer.h" #include "xla/pjrt/utils.h" @@ -1550,7 +1552,6 @@ PjRtStreamExecutorBuffer::CopyToDeviceHelper( dst_device, dst_local_device, transfer_stream, /*is_uninitialized_create=*/false, client_)); - ScopedHold dst_device_buffer(py_buffer->GetBufferWithUsageHold()); CHECK(dst_device_buffer.ok()); @@ -2150,15 +2151,15 @@ static SendDeviceMemoryFunction ConvertSendCallbacksToSendFunction( tsl::thread::ThreadPool* thread_pool) { // Check if we have callbacks registered for the given device ordinal. if (device_ordinal >= options.send_callbacks.size()) { - return [device_ordinal]( - int64_t channel_id, se::Stream*, const Shape&, - const se::DeviceMemoryBase&, - const absl::flat_hash_map&) { - return InvalidArgument( - "Failed to send a buffer to the channel_id=%d, there was no send " - "callbacks registered for the device_ordinal=%d", - channel_id, device_ordinal); - }; + return + [device_ordinal](int64_t channel_id, se::Stream*, const Shape&, + const se::DeviceMemoryBase&, + const absl::flat_hash_map&) { + return InvalidArgument( + "Failed to send a buffer to the channel_id=%d, there was no send " + "callbacks registered for the device_ordinal=%d", + channel_id, device_ordinal); + }; } // SendCallbacks registered for a device ordinal. Can be empty. @@ -2305,15 +2306,15 @@ static RecvDeviceMemoryFunction ConvertRecvCallbacksToRecvFunction( int device_ordinal, const ExecuteOptions& options) { // Check if we have callbacks registered for the given device ordinal. if (device_ordinal >= options.send_callbacks.size()) { - return [device_ordinal]( - int64_t channel_id, se::Stream*, const Shape&, - se::DeviceMemoryBase*, - const absl::flat_hash_map&) { - return InvalidArgument( - "Failed to receive a buffer from the channel_id=%d, there was no " - "recv callbacks registered for the device_ordinal=%d", - channel_id, device_ordinal); - }; + return + [device_ordinal](int64_t channel_id, se::Stream*, const Shape&, + se::DeviceMemoryBase*, + const absl::flat_hash_map&) { + return InvalidArgument( + "Failed to receive a buffer from the channel_id=%d, there was no " + "recv callbacks registered for the device_ordinal=%d", + channel_id, device_ordinal); + }; } // RecvCallbacks registered for a device ordinal. Can be empty. diff --git a/third_party/xla/xla/pjrt/pjrt_stream_executor_client.h b/third_party/xla/xla/pjrt/pjrt_stream_executor_client.h index 27fcc20386598b..a8225d46f4d030 100644 --- a/third_party/xla/xla/pjrt/pjrt_stream_executor_client.h +++ b/third_party/xla/xla/pjrt/pjrt_stream_executor_client.h @@ -880,6 +880,7 @@ class PjRtStreamExecutorExecutable : public PjRtLoadedExecutable { friend class PjRtStreamExecutorClient; friend class PjRtTpuClient; friend class InternalPjRtTpuClient; + friend class StreamExecutorGpuClient; // Initializes information about which arguments to which executables must be // donated due to aliases that were specified by the computation. Status SetUpDonation(bool tuple_inputs); diff --git a/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.cc b/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.cc new file mode 100644 index 00000000000000..460737315f3343 --- /dev/null +++ b/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.cc @@ -0,0 +1,31 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/pjrt/stream_executor_unloaded_executable.h" + +#include + +#include "absl/status/status.h" +#include "xla/statusor.h" + +namespace xla { +// TODO(b/296466237): Add serialization. +StatusOr StreamExecutorUnloadedExecutable::SerializeExecutable() + const { + return absl::UnimplementedError( + "StreamExecutorUnloadedExecutable::SerializeExecutable() not " + "implemented"); +} +} // namespace xla diff --git a/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.h b/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.h new file mode 100644 index 00000000000000..a44660e9579b68 --- /dev/null +++ b/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.h @@ -0,0 +1,78 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_PJRT_STREAM_EXECUTOR_UNLOADED_EXECUTABLE_H_ +#define XLA_PJRT_STREAM_EXECUTOR_UNLOADED_EXECUTABLE_H_ + +#include "absl/status/status.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/pjrt/pjrt_executable.h" +#include "xla/service/compiler.h" + +namespace xla { +// TODO(b/300657649): Rename existing PjRtStreamExecutorExecutable to +// PjRtStreamExecutorLoadedExecutable, and this one to +// PjRtStreamExecutorExecutable. +class StreamExecutorUnloadedExecutable : public PjRtExecutable { + public: + StreamExecutorUnloadedExecutable( + const CompileOptions& compile_options, + std::vector> executables, + int num_replicas, int num_partitions, absl::string_view name) + : compile_options_(compile_options), + aot_executables_(std::move(executables)), + num_replicas_(num_replicas), + num_partitions_(num_partitions), + name_(name) {} + + StatusOr SerializeExecutable() const override; + + absl::string_view name() const override { return name_; } + int num_replicas() const override { return num_replicas_; } + int num_partitions() const override { return num_partitions_; } + absl::StatusOr GetCompileOptions() const override { + return compile_options_; + } + absl::StatusOr>> GetHloModules() + const override { + return absl::UnimplementedError("GetHloModules is not supported."); + } + + absl::StatusOr>> + GetOutputMemoryKinds() const override { + return absl::UnimplementedError("GetOutputMemoryKinds is not supported."); + } + StatusOr> GetCostAnalysis() + const override { + return absl::UnimplementedError("GetCostAnalysis is not supported."); + } + + int64_t SizeOfGeneratedCodeInBytes() const override { return 0; } + + const CompileOptions& compile_options() const { return compile_options_; } + std::vector>& aot_executables() { + return aot_executables_; + } + + private: + CompileOptions compile_options_; + std::vector> aot_executables_; + int num_replicas_; + int num_partitions_; + std::string name_; +}; +} // namespace xla + +#endif // XLA_PJRT_STREAM_EXECUTOR_UNLOADED_EXECUTABLE_H_ diff --git a/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.proto b/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.proto new file mode 100644 index 00000000000000..28b3e21204c766 --- /dev/null +++ b/third_party/xla/xla/pjrt/stream_executor_unloaded_executable.proto @@ -0,0 +1,28 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +syntax = "proto3"; + +package xla; + +import "xla/pjrt/compile_options.proto"; + +message StreamExecutorGpuExecutableProto { + CompileOptionsProto compile_options = 1; + repeated bytes executables = 2; + int32 num_replicas = 3; + int32 num_partitions = 4; + string name = 5; +} diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 17eb3ead3fd0a4..faf60063e796dc 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -2559,6 +2559,19 @@ cc_library( ], ) +cc_library( + name = "gpu_target_config", + srcs = ["gpu_target_config.cc"], + hdrs = ["gpu_target_config.h"], + compatible_with = get_compatible_with_portable(), + visibility = ["//visibility:public"], + deps = [ + ":gpu_device_info", + "//xla/stream_executor:device_description_proto_cc_impl", + "//xla/stream_executor:dnn", + ], +) + cc_library( name = "gpu_compiler", srcs = if_gpu_is_configured([ @@ -2599,6 +2612,7 @@ cc_library( ":gpu_sanitize_constant_names", ":gpu_scatter_expander", ":gpu_shape_verifier", + ":gpu_target_config", ":hlo_fusion_stats", ":horizontal_input_fusion", ":horizontal_loop_fusion", diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index ec61161d3b5ef0..f025ef98cc0af9 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -257,21 +257,6 @@ GpuXlaRuntimeAotCompilationResult::LoadExecutable( gpu_compiler->GetGpuVersion(executor), executor); } -GpuTargetConfig::GpuTargetConfig(const se::GpuTargetConfigProto& proto) - : gpu_device_info(proto.gpu_device_info()), - platform_name(proto.platform_name()), - dnn_version_info(proto.dnn_version_info()), - device_description_str(proto.device_description_str()) {} - -se::GpuTargetConfigProto GpuTargetConfig::ToProto() const { - se::GpuTargetConfigProto proto; - *proto.mutable_gpu_device_info() = gpu_device_info.ToProto(); - proto.set_platform_name(platform_name); - *proto.mutable_dnn_version_info() = dnn_version_info.ToProto(); - proto.set_device_description_str(device_description_str); - return proto; -} - GpuCompiler::GpuCompiler(se::Platform::Id platform_id, const char* target_triple, const char* data_layout) : platform_id_(platform_id), diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.h b/third_party/xla/xla/service/gpu/gpu_compiler.h index 2a34ee7b7f86e5..bef6f651119e62 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.h +++ b/third_party/xla/xla/service/gpu/gpu_compiler.h @@ -31,6 +31,7 @@ limitations under the License. #include "xla/service/gpu/executable.pb.h" #include "xla/service/gpu/gpu_device_info.h" #include "xla/service/gpu/gpu_executable.h" +#include "xla/service/gpu/gpu_target_config.h" #include "xla/service/hlo.pb.h" #include "xla/service/hlo_dataflow_analysis.h" #include "xla/service/hlo_pass_pipeline.h" @@ -97,18 +98,6 @@ class GpuXlaRuntimeAotCompilationResult : public AotCompilationResult { XlaRuntimeGpuExecutableProto xla_runtime_gpu_executable_; }; -struct GpuTargetConfig { - GpuTargetConfig() = default; - explicit GpuTargetConfig(const stream_executor::GpuTargetConfigProto& proto); - - se::GpuTargetConfigProto ToProto() const; - - GpuDeviceInfo gpu_device_info; - std::string platform_name; - se::dnn::VersionInfo dnn_version_info; - std::string device_description_str; -}; - // The GPU compiler generates efficient GPU executables. class GpuCompiler : public LLVMCompiler { public: diff --git a/third_party/xla/xla/service/gpu/gpu_target_config.cc b/third_party/xla/xla/service/gpu/gpu_target_config.cc new file mode 100644 index 00000000000000..44a5ceff8534f8 --- /dev/null +++ b/third_party/xla/xla/service/gpu/gpu_target_config.cc @@ -0,0 +1,38 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/service/gpu/gpu_target_config.h" + +namespace xla { +namespace gpu { + +GpuTargetConfig::GpuTargetConfig( + const stream_executor::GpuTargetConfigProto& proto) + : gpu_device_info(proto.gpu_device_info()), + platform_name(proto.platform_name()), + dnn_version_info(proto.dnn_version_info()), + device_description_str(proto.device_description_str()) {} + +stream_executor::GpuTargetConfigProto GpuTargetConfig::ToProto() const { + stream_executor::GpuTargetConfigProto proto; + *proto.mutable_gpu_device_info() = gpu_device_info.ToProto(); + proto.set_platform_name(platform_name); + *proto.mutable_dnn_version_info() = dnn_version_info.ToProto(); + proto.set_device_description_str(device_description_str); + return proto; +} + +} // namespace gpu +} // namespace xla diff --git a/third_party/xla/xla/service/gpu/gpu_target_config.h b/third_party/xla/xla/service/gpu/gpu_target_config.h new file mode 100644 index 00000000000000..4c5e40f58fda83 --- /dev/null +++ b/third_party/xla/xla/service/gpu/gpu_target_config.h @@ -0,0 +1,41 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_SERVICE_GPU_GPU_TARGET_CONFIG_H_ +#define XLA_SERVICE_GPU_GPU_TARGET_CONFIG_H_ + +#include "xla/service/gpu/gpu_device_info.h" +#include "xla/stream_executor/device_description.pb.h" +#include "xla/stream_executor/dnn.h" + +namespace xla { +namespace gpu { + +struct GpuTargetConfig { + GpuTargetConfig() = default; + explicit GpuTargetConfig(const stream_executor::GpuTargetConfigProto& proto); + + stream_executor::GpuTargetConfigProto ToProto() const; + + GpuDeviceInfo gpu_device_info; + std::string platform_name; + stream_executor::dnn::VersionInfo dnn_version_info; + std::string device_description_str; +}; + +} // namespace gpu +} // namespace xla + +#endif // XLA_SERVICE_GPU_GPU_TARGET_CONFIG_H_ From 2779ea10a44c93635e9f59f60cd320a293636440 Mon Sep 17 00:00:00 2001 From: Matthias Kramm Date: Fri, 22 Sep 2023 13:31:51 -0700 Subject: [PATCH 168/567] For embedding program key argument patching, allow ops with mini_batch_splits to be outside of a tf_device.launch. PiperOrigin-RevId: 567704317 --- .../tests/embedding_program_key.mlir | 131 +++++++++++-- .../transforms/embedding_program_key.cc | 173 ++++++++++++------ 2 files changed, 232 insertions(+), 72 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/embedding_program_key.mlir b/tensorflow/compiler/mlir/tensorflow/tests/embedding_program_key.mlir index 078c59ba67f335..6cf7cc91e73f4a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/embedding_program_key.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/embedding_program_key.mlir @@ -17,6 +17,8 @@ func.func @single_op_program_key() { return } +// ----- + // CHECK-LABEL: func @multiple_ops_program_key func.func @multiple_ops_program_key() { // CHECK: %[[COMPILE_LAUNCH:[0-9]*]]:2 = "tf_device.launch" @@ -36,6 +38,8 @@ func.func @multiple_ops_program_key() { return } +// ----- + // CHECK-LABEL: func @reorder_single_op_program_key func.func @reorder_single_op_program_key() { // CHECK: %[[COMPILE_LAUNCH:[0-9]*]]:2 = "tf_device.launch" @@ -53,6 +57,8 @@ func.func @reorder_single_op_program_key() { return } +// ----- + // CHECK-LABEL: func @reorder_multiple_ops_program_key func.func @reorder_multiple_ops_program_key() { // CHECK: %[[COMPILE_LAUNCH:[0-9]*]]:2 = "tf_device.launch" @@ -72,6 +78,8 @@ func.func @reorder_multiple_ops_program_key() { return } +// ----- + // CHECK-LABEL: func @reorder_multiple_ops_with_successors_program_key func.func @reorder_multiple_ops_with_successors_program_key() { // CHECK: %[[COMPILE_LAUNCH:[0-9]*]]:2 = "tf_device.launch" @@ -95,6 +103,8 @@ func.func @reorder_multiple_ops_with_successors_program_key() { return } +// ----- + // CHECK-LABEL: func @launch_intermediate_usage func.func @launch_intermediate_usage() { // CHECK: %[[ORIG_LAUNCH:[0-9]*]]:2 = "tf_device.launch" @@ -122,12 +132,29 @@ func.func @launch_intermediate_usage() { return } -// CHECK-LABEL: func @preprocess_not_in_launch -func.func @preprocess_not_in_launch() { - // CHECK: %[[COMPILE_LAUNCH:[0-9]*]]:2 = "tf_device.launch" +// ----- + +// CHECK-LABEL: func @compile_not_in_launch +func.func @compile_not_in_launch() { // CHECK: TPUCompileMlir // CHECK: %[[CONSTANT:[a-z0-9]*]] = "tf.Const" // CHECK: "tf.OpA"(%[[CONSTANT]] + %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) + "tf_device.launch"() ({ + %cst_0 = "tf.Const"() {value = dense<""> : tensor<1x!tf_type.string>} : () -> tensor<1x!tf_type.string> + "tf.OpA"(%cst_0) { mini_batch_splits = ""} : (tensor<1x!tf_type.string>) -> () + tf_device.return + }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> () + return +} + +// ----- + +// CHECK-LABEL: func @preprocess_not_in_launch +func.func @preprocess_not_in_launch() { + // CHECK: [[COMPILE_LAUNCH:%[0-9]*]]:2 = "tf_device.launch" + // CHECK: TPUCompileMlir + // CHECK: "tf.OpA"([[COMPILE_LAUNCH]]#1 %0:2 = "tf_device.launch"() ({ %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> @@ -137,17 +164,99 @@ func.func @preprocess_not_in_launch() { return } -// CHECK-LABEL: func @compile_not_in_launch -func.func @compile_not_in_launch() { +// ----- + +// CHECK-LABEL: func @preprocess_not_in_launch_and_needs_moving +func.func @preprocess_not_in_launch_and_needs_moving() { + // CHECK: [[COMPILE_LAUNCH:%[0-9]*]]:2 = "tf_device.launch" // CHECK: TPUCompileMlir - // CHECK: %[[CONSTANT:[a-z0-9]*]] = "tf.Const" - // CHECK: "tf.OpA"(%[[CONSTANT]] - %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) - "tf_device.launch"() ({ + // CHECK: "tf.OpA"([[COMPILE_LAUNCH]]#1 + %cst_0 = "tf.Const"() {value = dense<""> : tensor<1x!tf_type.string>} : () -> tensor<1x!tf_type.string> + "tf.OpA"(%cst_0) { mini_batch_splits = ""} : (tensor<1x!tf_type.string>) -> () + %0:2 = "tf_device.launch"() ({ + %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) + tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> + }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<3x!tf_type.string>) + return +} + +// ----- + +// CHECK-LABEL: func @only_compile_under_replicate +func.func @only_compile_under_replicate() { + // CHECK-DAG: [[COMPILE_REPLICATE:%[0-9]*]]:4 = tf_device.replicate + // CHECK-NEXT: [[COMPILE_LAUNCH:%[0-9]*]]:2 = "tf_device.launch" + // CHECK-DAG: _TPUCompileMlir + // CHECK-DAG: %[[CONSTANT:[a-z0-9]*]] = "tf.Const" + // CHECK: "tf.OpA" + %0:4 = "tf_device.replicate"() ({ + %0:2 = "tf_device.launch"() ({ + %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) + tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> + }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<3x!tf_type.string>) + tf_device.return %0#0, %0#1: tensor, tensor<3x!tf_type.string> + }) {n = 2: i32, operandSegmentSizes = array} : () -> ( + tensor, + tensor, + tensor<3x!tf_type.string>, + tensor<3x!tf_type.string> + ) + %cst_0 = "tf.Const"() {value = dense<""> : tensor<1x!tf_type.string>} : () -> tensor<1x!tf_type.string> + "tf.OpA"(%cst_0) { mini_batch_splits = ""} : (tensor<1x!tf_type.string>) -> () + return +} + +// ----- + +// CHECK-LABEL: func @compile_and_op_under_replicate +func.func @compile_and_op_under_replicate() { + // CHECK-DAG: [[COMPILE_REPLICATE:%[0-9]*]]:4 = tf_device.replicate + // CHECK-NEXT: [[COMPILE_LAUNCH:%[0-9]*]]:2 = "tf_device.launch" + // CHECK-DAG: _TPUCompileMlir + // CHECK-DAG: %[[CONSTANT:[a-z0-9]*]] = "tf.Const" + // CHECK: "tf.OpA"([[COMPILE_LAUNCH]]#1 + %0:4 = "tf_device.replicate"() ({ + %0:2 = "tf_device.launch"() ({ + %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) + tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> + }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<3x!tf_type.string>) %cst_0 = "tf.Const"() {value = dense<""> : tensor<1x!tf_type.string>} : () -> tensor<1x!tf_type.string> "tf.OpA"(%cst_0) { mini_batch_splits = ""} : (tensor<1x!tf_type.string>) -> () - tf_device.return - }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> () + tf_device.return %0#0, %0#1: tensor, tensor<3x!tf_type.string> + }) {n = 2: i32, operandSegmentSizes = array} : () -> ( + tensor, + tensor, + tensor<3x!tf_type.string>, + tensor<3x!tf_type.string> + ) return } +// ----- + +// CHECK-LABEL: func @compile_and_op_under_replicate_and_launch +func.func @compile_and_op_under_replicate_and_launch() { + // CHECK-DAG: [[COMPILE_REPLICATE:%[0-9]*]]:4 = tf_device.replicate + // CHECK-NEXT: [[COMPILE_LAUNCH:%[0-9]*]]:2 = "tf_device.launch" + // CHECK-DAG: _TPUCompileMlir + // CHECK-DAG: %[[CONSTANT:[a-z0-9]*]] = "tf.Const" + // CHECK: "tf.OpA"([[COMPILE_LAUNCH]]#1 + %0:4 = "tf_device.replicate"() ({ + %0:2 = "tf_device.launch"() ({ + %compilation_status, %program = "tf._TPUCompileMlir"() { metadata = "...", mlir_module = "..." } : () -> (tensor, tensor<3x!tf_type.string>) + tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> + }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<3x!tf_type.string>) + "tf_device.launch"() ({ + %cst_0 = "tf.Const"() {value = dense<""> : tensor<1x!tf_type.string>} : () -> tensor<1x!tf_type.string> + "tf.OpA"(%cst_0) { mini_batch_splits = ""} : (tensor<1x!tf_type.string>) -> () + tf_device.return + }) {device = "/job:localhost/replica:0/task:0/device:CPU:0"} : () -> () + tf_device.return %0#0, %0#1: tensor, tensor<3x!tf_type.string> + }) {n = 2: i32, operandSegmentSizes = array} : () -> ( + tensor, + tensor, + tensor<3x!tf_type.string>, + tensor<3x!tf_type.string> + ) + return +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/embedding_program_key.cc b/tensorflow/compiler/mlir/tensorflow/transforms/embedding_program_key.cc index 829a5f6080a9c6..73bc4d13f028c9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/embedding_program_key.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/embedding_program_key.cc @@ -15,21 +15,23 @@ limitations under the License. #include #include -#include +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/Casting.h" #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" -#include "tensorflow/compiler/mlir/tensorflow/translate/split_into_island_per_op_pass.h" -#include "tensorflow/compiler/mlir/tensorflow/utils/attribute_utils.h" namespace mlir { namespace TFDevice { @@ -60,22 +62,63 @@ bool OpInBlock(Operation* op, Block* block) { return false; } -// Checks if there is a precedecessor TPUCOmpileMlir op of `preprocess_op` in -// `func_op`. Assumes both ops are wrapped in a tf_device.launch. -bool HasTPUCompilePredecessor(func::FuncOp func_op, Operation* preprocess_op) { - bool has_tpu_compile_predecessor = false; - func_op.walk([&](TF::_TPUCompileMlirOp compile_op) { - if (compile_op->getParentOp()->getBlock() == - preprocess_op->getParentOp()->getBlock() && - compile_op->getParentOp()->isBeforeInBlock( - preprocess_op->getParentOp())) { - has_tpu_compile_predecessor = true; +// Find a TPUCompileMlirOp that's after the given `preprocess_op`, under `func`. +// Assumes the TPUCompileMlirOp is wrapped in a tf_device.launch. +Operation* FindCompileSuccessor(Operation* func_op, Operation* preprocess_op) { + bool in_launch = isa(preprocess_op->getParentOp()); + Operation* preprocess_or_launch = + in_launch ? preprocess_op->getParentOp() : preprocess_op; + + Operation* tpu_compile_successor = nullptr; + func_op->walk([&](TF::_TPUCompileMlirOp compile_op) { + if (compile_op->getParentOp() == nullptr || + !isa(compile_op->getParentOp())) + return WalkResult::advance(); + Operation* compile_launch_op = compile_op->getParentOp(); + + if (compile_launch_op->getBlock() == preprocess_or_launch->getBlock() && + preprocess_or_launch->isBeforeInBlock(compile_launch_op)) { + tpu_compile_successor = compile_op; return WalkResult::interrupt(); } return WalkResult::advance(); }); + return tpu_compile_successor; +} - return has_tpu_compile_predecessor; +// Find a TPUCompileMlirOp that's before the given `preprocess_op`, under +// `func`. Assumes the TPUCompileMlirOp is wrapped in a tf_device.launch, +// possibly itself wrapped in a tf_device.replicate. +Operation* FindCompilePredecessor(Operation* func_op, + Operation* preprocess_op) { + bool in_launch = isa(preprocess_op->getParentOp()); + Operation* preprocess_or_launch = + in_launch ? preprocess_op->getParentOp() : preprocess_op; + + Operation* tpu_compile_predecessor = nullptr; + func_op->walk([&](TF::_TPUCompileMlirOp compile_op) { + if (compile_op->getParentOp() == nullptr || + !isa(compile_op->getParentOp())) + return WalkResult::advance(); + Operation* compile_launch_op = compile_op->getParentOp(); + if (compile_launch_op->getBlock() == preprocess_or_launch->getBlock() && + compile_launch_op->isBeforeInBlock(preprocess_or_launch)) { + tpu_compile_predecessor = compile_op; + return WalkResult::interrupt(); + } + // The launch op might be underneath a replicate op. If the preprocess_op is + // in the same block as said replicate, that's OK, too. + if (auto replicate_op = llvm::dyn_cast_or_null( + compile_launch_op->getParentOp())) { + if (replicate_op->getBlock() == preprocess_or_launch->getBlock() && + replicate_op->isBeforeInBlock(preprocess_or_launch)) { + tpu_compile_predecessor = compile_op; + return WalkResult::interrupt(); + } + } + return WalkResult::advance(); + }); + return tpu_compile_predecessor; } // Get all of the successor ops of `root_op` in the same block. @@ -233,11 +276,12 @@ void CreateReducedLaunchOp(OpBuilder* builder, Block* old_block, // ops before `preprocess_op`. This actually creates a new launch op after // _TPUCompileMlir and moves `preprocess_op` and its successors that are input // to TPUExecute to it. -LogicalResult MovePreprocessingOps(OpBuilder* builder, func::FuncOp func_op, - Operation* preprocess_op) { +LogicalResult MovePreprocessingOpInLaunch(OpBuilder* builder, + func::FuncOp func_op, + Operation* preprocess_op) { // If this is already a TPUCompile predecessor, no need to move the // preprocessing ops. - if (HasTPUCompilePredecessor(func_op, preprocess_op)) return success(); + if (FindCompilePredecessor(func_op, preprocess_op)) return success(); auto original_launch_op = llvm::dyn_cast(preprocess_op->getParentOp()); @@ -252,19 +296,8 @@ LogicalResult MovePreprocessingOps(OpBuilder* builder, func::FuncOp func_op, } // Find the TPUCompile successor. - Operation* tpu_compile_successor = nullptr; - func_op.walk([&](TF::_TPUCompileMlirOp compile_op) { - if (compile_op->getParentOp() == nullptr || - !isa(compile_op->getParentOp())) - return WalkResult::advance(); - if (compile_op->getParentOp()->getBlock() == - original_launch_op->getBlock() && - original_launch_op->isBeforeInBlock(compile_op->getParentOp())) { - tpu_compile_successor = compile_op; - return WalkResult::interrupt(); - } - return WalkResult::advance(); - }); + Operation* tpu_compile_successor = + FindCompileSuccessor(func_op, preprocess_op); // Return early if can't find TPUCompile successor. if (tpu_compile_successor == nullptr) return success(); @@ -287,35 +320,51 @@ LogicalResult MovePreprocessingOps(OpBuilder* builder, func::FuncOp func_op, return success(); } +LogicalResult MoveStandalonePreprocessingOp(OpBuilder* builder, + func::FuncOp func_op, + Operation* preprocess_op) { + if (FindCompilePredecessor(func_op, preprocess_op)) return success(); + + // Find the TPUCompile successor we want to move upwards. We're moving the + // compile, not the preprocess_op, since it's easier to move (because it + // doesn't typically have any dependencies) + Operation* tpu_compile_successor = + FindCompileSuccessor(func_op, preprocess_op); + if (tpu_compile_successor == nullptr) return success(); + + Operation* compile_launch_op = tpu_compile_successor->getParentOp(); + + // If the launch isn't in the same block as the preprocess op, abort. + if (compile_launch_op->getBlock() != preprocess_op->getBlock()) + return success(); + + // Move the compile op launch right before our op. + compile_launch_op->moveBefore(preprocess_op); + + return success(); +} + // Rewrites the program_key input of `preprocess_op` to use the output of // _TPUCompileMlir. void RewritePreprocessInputs(OpBuilder* builder, func::FuncOp func_op, Operation* preprocess_op) { - if (preprocess_op->getParentOp() == nullptr || - !isa(preprocess_op->getParentOp())) - return; + if (preprocess_op->getParentOp() == nullptr) return; // Find predecessor TPUCompile Op and rewrite the program key. - func_op.walk([&](TF::_TPUCompileMlirOp compile_op) { - if (compile_op->getParentOp() == nullptr || - !isa(compile_op->getParentOp())) - return WalkResult::advance(); - if (compile_op->getParentOp()->getBlock() == - preprocess_op->getParentOp()->getBlock() && - compile_op->getParentOp()->isBeforeInBlock( - preprocess_op->getParentOp())) { - // Find the corresponding result of the _TPUCompileMlirOp in launch return - // op. - for (OpOperand& operand : compile_op->getResult(1).getUses()) { - if (llvm::isa(operand.getOwner())) { - preprocess_op->setOperand(0, compile_op->getParentOp()->getResult( - operand.getOperandNumber())); - } - } - return WalkResult::interrupt(); + Operation* tpu_compile_predecessor = + FindCompilePredecessor(func_op, preprocess_op); + if (tpu_compile_predecessor == nullptr) return; + + for (OpOperand& operand : tpu_compile_predecessor->getResult(1).getUses()) { + if (llvm::isa(operand.getOwner()) && + tpu_compile_predecessor->getParentOp() + ->getBlock() + ->findAncestorOpInBlock(*preprocess_op)) { + preprocess_op->setOperand( + 0, tpu_compile_predecessor->getParentOp()->getResult( + operand.getOperandNumber())); } - return WalkResult::advance(); - }); + } } void EmbeddingProgramKeyPass::runOnOperation() { @@ -325,18 +374,20 @@ void EmbeddingProgramKeyPass::runOnOperation() { // Handle ops with mini_batch_splits attribute first since all preprocessing // ops may need to be moved. getOperation().walk([&](Operation* op) { - if (op->hasAttr(kMiniBatchSplitsAttr) && - isa(op->getParentOp())) { - preprocess_ops.push_back(op); - } + if (op->hasAttr(kMiniBatchSplitsAttr)) preprocess_ops.push_back(op); }); OpBuilder builder(&getContext()); - for (Operation* preprocess_op : preprocess_ops) { - if (failed(MovePreprocessingOps(&builder, getOperation(), preprocess_op))) - return signalPassFailure(); - RewritePreprocessInputs(&builder, getOperation(), preprocess_op); + for (Operation* op : preprocess_ops) { + if (isa(op->getParentOp())) { + if (failed(MovePreprocessingOpInLaunch(&builder, getOperation(), op))) + return signalPassFailure(); + } else { + if (failed(MoveStandalonePreprocessingOp(&builder, getOperation(), op))) + return signalPassFailure(); + } + RewritePreprocessInputs(&builder, getOperation(), op); } // Handle ops with mini_batch_in_csr attribute. From c273d635119a8b2082d6ace0bbcb0c5d614c84d2 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Fri, 22 Sep 2023 13:38:04 -0700 Subject: [PATCH 169/567] Fix PadOp lowering with negative padding amount PiperOrigin-RevId: 567705787 --- .../lite/stablehlo/tests/legalize_hlo.mlir | 19 +++++ .../lite/stablehlo/transforms/legalize_hlo.cc | 75 +++++++++++++++---- .../transforms/legalize_hlo_patterns.td | 5 +- 3 files changed, 84 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir b/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir index 6d95e62c79a452..ddc6848686f5a6 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir +++ b/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir @@ -3113,6 +3113,25 @@ func.func @convert_pad(%arg0: tensor<8x128xf32>, %arg1: tensor) -> tensor<1 func.return %0 : tensor<11x131xf32> } +// CHECK-LABEL: func @convert_pad_negative_amount( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x128xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor<7x128xf32> { +// CHECK: %[[VAL_2:.*]] = arith.constant dense<{{\[\[}}0, 0], [0, 1]]> : tensor<2x2xi64> +// CHECK: %[[VAL_3:.*]] = "tf.PadV2"(%[[VAL_0]], %[[VAL_2]], %[[VAL_1]]) : (tensor<8x128xf32>, tensor<2x2xi64>, tensor) -> tensor<8x129xf32> +// CHECK: %[[VAL_4:.*]] = arith.constant dense<[0, 1]> : tensor<2xi64> +// CHECK: %[[VAL_5:.*]] = arith.constant dense<[7, 128]> : tensor<2xi64> +// CHECK: %[[VAL_6:.*]] = "tf.Slice"(%[[VAL_3]], %[[VAL_4]], %[[VAL_5]]) : (tensor<8x129xf32>, tensor<2xi64>, tensor<2xi64>) -> tensor<7x128xf32> +// CHECK: return %[[VAL_6]] : tensor<7x128xf32> +// CHECK: } +func.func @convert_pad_negative_amount(%arg0: tensor<8x128xf32>, %arg1: tensor) -> tensor<7x128xf32> { + %0 = "mhlo.pad"(%arg0, %arg1) { + edge_padding_low = dense<[0, -1]> : tensor<2xi64>, + edge_padding_high = dense<[-1, 1]> : tensor<2xi64>, + interior_padding = dense<0> : tensor<2xi64> + } : (tensor<8x128xf32>, tensor) -> tensor<7x128xf32> + func.return %0 : tensor<7x128xf32> +} + // CHECK-LABEL: func @convert_round( // CHECK-SAME: %[[VAL_0:.*]]: tensor<8x128xbf16>) -> tensor<8x128xbf16> // CHECK: %[[VAL_1:.*]] = "tf.Round"(%[[VAL_0]]) : (tensor<8x128xbf16>) -> tensor<8x128xbf16> diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc index d424a1e9c3137b..0c1e157af27d3d 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc @@ -3344,20 +3344,69 @@ Value ConvertPadOp(PatternRewriter& rewriter, Operation* old_op) { auto pad_op = cast(old_op); mlir::Location loc = pad_op.getLoc(); - llvm::SmallVector padding; - for (auto p : llvm::zip(pad_op.getEdgePaddingLow().getValues(), + // Calculates non-negative padding amount and slice begins/sizes. + llvm::SmallVector padding; + llvm::SmallVector pad_output_shape; + + bool has_negative_padding_amount = false; + llvm::SmallVector slice_begins; + llvm::SmallVector slice_sizes; + + for (auto p : llvm::zip(pad_op.getOperand().getType().getShape(), + pad_op.getEdgePaddingLow().getValues(), pad_op.getEdgePaddingHigh().getValues())) { - padding.push_back(std::get<0>(p)); - padding.push_back(std::get<1>(p)); - } - auto attr_type = RankedTensorType::get({pad_op.getEdgePaddingLow().size(), 2}, - rewriter.getI64Type()); - auto padding_attr = DenseIntElementsAttr::get(attr_type, padding); - auto padding_op = - rewriter.create(loc, attr_type, padding_attr); - return rewriter.create(loc, pad_op.getType(), - pad_op.getOperand(), padding_op, - pad_op.getPaddingValue()); + const int64_t input_dim_size = std::get<0>(p); + int64_t pad_output_dim_size = input_dim_size; + + const int64_t pad_low = std::get<1>(p).getSExtValue(); + if (pad_low < 0) { + has_negative_padding_amount = true; + padding.push_back(0); + } else { + padding.push_back(pad_low); + pad_output_dim_size += pad_low; + } + + const int64_t pad_high = std::get<2>(p).getSExtValue(); + if (pad_high < 0) { + has_negative_padding_amount = true; + padding.push_back(0); + } else { + padding.push_back(pad_high); + pad_output_dim_size += pad_high; + } + + pad_output_shape.push_back(pad_output_dim_size); + + slice_begins.push_back(pad_low < 0 ? -pad_low : 0); + slice_sizes.push_back(input_dim_size + pad_low + pad_high); + } + + // Convert to PadV2. + auto padding_attr_type = RankedTensorType::get( + {pad_op.getEdgePaddingLow().size(), 2}, rewriter.getI64Type()); + auto padding_attr = DenseIntElementsAttr::get(padding_attr_type, padding); + auto padding_amount_const_op = + rewriter.create(loc, padding_attr_type, padding_attr); + auto new_pad_op = rewriter.create( + loc, pad_op.getType().clone(pad_output_shape), pad_op.getOperand(), + padding_amount_const_op, pad_op.getPaddingValue()); + if (!has_negative_padding_amount) { + return new_pad_op; + } + + // Convert negative padding amount into slice. + auto slice_attr_type = RankedTensorType::get( + {pad_op.getEdgePaddingLow().size()}, rewriter.getI64Type()); + auto slice_begins_const_op = rewriter.create( + loc, slice_attr_type, + DenseIntElementsAttr::get(slice_attr_type, slice_begins)); + auto slice_sizes_const_op = rewriter.create( + loc, slice_attr_type, + DenseIntElementsAttr::get(slice_attr_type, slice_sizes)); + return rewriter.create(loc, pad_op.getType(), new_pad_op, + slice_begins_const_op, + slice_sizes_const_op); } class ConvertPopulationCountOp diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td index 5030777490c5d8..3ba9d1911ffab6 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td @@ -309,8 +309,9 @@ def IsZero : Constraint() == 0">>; def ConvertPadOp : NativeCodeCall< "ConvertPadOp($_builder, $0.getDefiningOp())">; -def : Pat<(MHLO_PadOp:$old_value $input, $pad_value, $pad_low, $pad_high, - $pad_interior), +def : Pat<(MHLO_PadOp:$old_value + StaticShapeTensorOf<[TF_ElementType]>:$input, + $pad_value, $pad_low, $pad_high, $pad_interior), (ConvertPadOp $old_value), [(IsZero $pad_interior)]>; From af243fde7e770badda114281532a3cbd372becee Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 22 Sep 2023 13:53:50 -0700 Subject: [PATCH 170/567] Remove unused `std::to_string()` in `GetMinibatchSplitsWithPhysicalReplicas` kernel. PiperOrigin-RevId: 567709583 --- tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc index d4cd8035fdc7dd..38df331827907a 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc @@ -878,7 +878,6 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { continue; } int32_t col_id = item >> 32; - std::string col_id_str = std::to_string(col_id); int32_t replica_id = col_id % num_physical_replica; int32_t bucket_id; int32_t main_index; From 4be8c8d2d20c7297fd3fc6a036a21098b830c9bc Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 22 Sep 2023 14:10:17 -0700 Subject: [PATCH 171/567] Rewire TensorFlow OSS to use Keras 3 once the v3 release is live. PiperOrigin-RevId: 567713807 --- tensorflow/api_template.__init__.py | 98 +++++++++++++++++------ tensorflow/api_template_v1.__init__.py | 93 ++++++++++++++------- tensorflow/compat_template_v1.__init__.py | 39 ++++++--- 3 files changed, 165 insertions(+), 65 deletions(-) diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index 57676e93a68db1..8b789f7202db6d 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -25,6 +25,7 @@ only a placeholder to enable test cases to run. The TensorFlow build replaces this file with a file generated from [`api_template.__init__.py`](https://www.github.com/tensorflow/tensorflow/blob/master/tensorflow/api_template.__init__.py) """ +# pylint: disable=g-bad-import-order,protected-access,g-import-not-at-top import distutils as _distutils import importlib @@ -74,11 +75,9 @@ "Limited tf.summary API due to missing TensorBoard installation.") # Load tensorflow-io-gcs-filesystem if enabled -# pylint: disable=g-import-not-at-top if (_os.getenv("TF_USE_MODULAR_FILESYSTEM", "0") == "true" or _os.getenv("TF_USE_MODULAR_FILESYSTEM", "0") == "1"): import tensorflow_io_gcs_filesystem as _tensorflow_io_gcs_filesystem -# pylint: enable=g-import-not-at-top # Lazy-load estimator. _estimator_module = "tensorflow_estimator.python.estimator.api._v2.estimator" @@ -88,16 +87,51 @@ _current_module.__path__ = [_module_dir] + _current_module.__path__ setattr(_current_module, "estimator", estimator) -_keras_module = "keras.api._v2.keras" -_keras = _LazyLoader("keras", globals(), _keras_module) -_module_dir = _module_util.get_parent_dir_for_name(_keras_module) -if _module_dir: - _current_module.__path__ = [_module_dir] + _current_module.__path__ -setattr(_current_module, "keras", _keras) +# Keras v2 loading. +_keras_to_use = None +_keras_package_name = None +_keras_version = None +if _os.environ.get("TF_USE_LEGACY_KERAS", None) in ("true", "True", "1"): + # Users can opt out of Keras 3 with this environment variable. + try: + import tf_keras.api._v2.keras as _keras_to_use + + _keras_package_name = "tf_keras.api._v2.keras" + _keras_version = "tf_keras" + except ImportError: + _logging.warning( + "Your environment has TF_USE_LEGACY_KERAS set to True, but you " + "do not have the tf_keras package installed. You must install it " + "in order to use the legacy tf.keras. Install it via: " + "`pip install tf_keras`" + ) +else: + try: + import keras as _keras_module + + if _keras_module.__version__.startswith("3."): + # This is the Keras 3.x case. + _keras_to_use = _keras_module._tf_keras + _keras_package_name = "keras._tf_keras" + _keras_version = "keras_3" + else: + # This is the Keras 2.x case. + import keras.api._v2.keras as _keras_to_use + _keras_package_name = "keras.api._v2.keras" + _keras_version = "keras_2" + except (ImportError, AttributeError): + pass + +if _keras_to_use is not None: + setattr(_current_module, "keras", _keras_to_use) +else: + # TF will not have `tf.keras` in this case. This should not be silent. + _logging.warning("Unable to load `tf.keras`. Check that the `keras` package " + "is installed.") # Enable TF2 behaviors -from tensorflow.python.compat import v2_compat as _compat # pylint: disable=g-import-not-at-top +from tensorflow.python.compat import v2_compat as _compat _compat.enable_v2_behavior() _major_api_version = 2 @@ -156,13 +190,13 @@ def _running_from_pip_package(): # actually trying to import it. Have a Try-Catch to make sure it doesn't break # when it doing some very initial loading, like tf.compat.v2, etc. try: - _keras_package = "keras.api._v2.keras." - _losses = _LazyLoader("losses", globals(), _keras_package + "losses") - _metrics = _LazyLoader("metrics", globals(), _keras_package + "metrics") + _losses = _LazyLoader("losses", globals(), _keras_package_name + ".losses") + _metrics = _LazyLoader( + "metrics", globals(), _keras_package_name + ".metrics") _optimizers = _LazyLoader( - "optimizers", globals(), _keras_package + "optimizers") + "optimizers", globals(), _keras_package_name + ".optimizers") _initializers = _LazyLoader( - "initializers", globals(), _keras_package + "initializers") + "initializers", globals(), _keras_package_name + ".initializers") setattr(_current_module, "losses", _losses) setattr(_current_module, "metrics", _metrics) setattr(_current_module, "optimizers", _optimizers) @@ -175,25 +209,37 @@ def _running_from_pip_package(): # SavedModel registry. # See b/196254385 for more details. try: - importlib.import_module("keras.optimizers") - except (ImportError, AttributeError): - pass - try: - importlib.import_module("keras.src.optimizers") + if _keras_version == "keras_2": + importlib.import_module("keras.src.optimizers") + elif _keras_version == "tf_keras": + importlib.import_module("tf_keras.src.optimizers") except (ImportError, AttributeError): pass + del importlib # Explicitly import lazy-loaded modules to support autocompletion. -# pylint: disable=g-import-not-at-top if _typing.TYPE_CHECKING: from tensorflow_estimator.python.estimator.api._v2 import estimator as estimator - from keras.api._v2 import keras - from keras.api._v2.keras import losses - from keras.api._v2.keras import metrics - from keras.api._v2.keras import optimizers - from keras.api._v2.keras import initializers -# pylint: enable=g-import-not-at-top + + if _keras_version == "keras_2": + from keras.api._v2 import keras + from keras.api._v2.keras import losses + from keras.api._v2.keras import metrics + from keras.api._v2.keras import optimizers + from keras.api._v2.keras import initializers + elif _keras_version == "tf_keras": + from tf_keras.api._v2 import keras + from tf_keras.api._v2.keras import losses + from tf_keras.api._v2.keras import metrics + from tf_keras.api._v2.keras import optimizers + from tf_keras.api._v2.keras import initializers + elif _keras_version == "keras_3": + from keras import _tf_keras as keras + from keras._tf_keras import losses + from keras._tf_keras import metrics + from keras._tf_keras import optimizers + from keras._tf_keras import initializers # pylint: enable=undefined-variable diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index 9684275e572f31..fe250ed3df42f5 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -22,7 +22,7 @@ import sys as _sys import typing as _typing -# pylint: disable=g-bad-import-order +# pylint: disable=g-bad-import-order,protected-access,g-import-not-at-top from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import from tensorflow.python.tools import module_util as _module_util from tensorflow.python.platform import tf_logging as _logging @@ -65,11 +65,9 @@ _current_module.compat.v2 # pylint: disable=pointless-statement # Load tensorflow-io-gcs-filesystem if enabled -# pylint: disable=g-import-not-at-top if (_os.getenv("TF_USE_MODULAR_FILESYSTEM", "0") == "true" or _os.getenv("TF_USE_MODULAR_FILESYSTEM", "0") == "1"): import tensorflow_io_gcs_filesystem as _tensorflow_io_gcs_filesystem -# pylint: enable=g-import-not-at-top # Lazy-load estimator. _estimator_module = "tensorflow_estimator.python.estimator.api._v1.estimator" @@ -79,15 +77,48 @@ _current_module.__path__ = [_module_dir] + _current_module.__path__ setattr(_current_module, "estimator", estimator) -_keras_module = "keras.api._v1.keras" -keras = _LazyLoader("keras", globals(), _keras_module) -_module_dir = _module_util.get_parent_dir_for_name(_keras_module) -if _module_dir: - _current_module.__path__ = [_module_dir] + _current_module.__path__ -setattr(_current_module, "keras", keras) +# Keras v1 loading. +_keras_to_use = None +_keras_package_name = None +_keras_version = None +if _os.environ.get("TF_USE_LEGACY_KERAS", None) in ("true", "True", "1"): + # Users can opt out of Keras 3 with this environment variable. + try: + import tf_keras.api._v1.keras as _keras_to_use + _keras_package_name = "tf_keras.api._v1.keras" + _keras_version = "tf_keras" + except ImportError: + _logging.warning( + "Your environment has TF_USE_LEGACY_KERAS set to True, but you " + "do not have the tf_keras package installed. You must install it " + "in order to use the legacy tf.keras. Install it via: " + "`pip install tf_keras`" + ) +else: + try: + import keras as _keras_module + + if _keras_module.__version__.startswith("3."): + # This is the Keras 3.x case. It does not have v1 compatibility. + _keras_to_use = None + _keras_package_name = None + _keras_version = "keras_3" + else: + # This is the Keras 2.x case. + import keras.api._v1.keras as _keras_to_use + _keras_package_name = "keras.api._v1.keras" + _keras_version = "keras_2" + except (ImportError, AttributeError): + pass + +if _keras_to_use is not None: + setattr(_current_module, "keras", _keras_to_use) +else: + # TF will not have `tf.keras` in this case. This should not be silent. + _logging.warning("Unable to load `tf.keras`. Check that the `keras` package " + "is installed.") -from tensorflow.python.util.lazy_loader import LazyLoader # pylint: disable=g-import-not-at-top _CONTRIB_WARNING = """ The TensorFlow contrib module will not be included in TensorFlow 2.0. For more information, please see: @@ -96,16 +127,15 @@ * https://github.com/tensorflow/io (for I/O related ops) If you depend on functionality not listed there, please file an issue. """ -contrib = LazyLoader("contrib", globals(), "tensorflow.contrib", - _CONTRIB_WARNING) -del LazyLoader +contrib = _LazyLoader("contrib", globals(), "tensorflow.contrib", + _CONTRIB_WARNING) # The templated code that replaces the placeholder above sometimes # sets the __all__ variable. If it does, we have to be sure to add # "contrib". if "__all__" in vars(): vars()["__all__"].append("contrib") -from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top +from tensorflow.python.platform import flags # The "app" module will be imported as part of the placeholder section above. _current_module.app.flags = flags # pylint: disable=undefined-variable setattr(_current_module, "flags", flags) @@ -114,19 +144,20 @@ # Add module aliases from Keras to TF. # Some tf endpoints actually lives under Keras. -if hasattr(_current_module, "keras"): +if (hasattr(_current_module, "keras") and + _keras_version in ("tf_keras", "keras_2")): # It is possible that keras is a lazily loaded module, which might break when # actually trying to import it. Have a Try-Catch to make sure it doesn't break # when it doing some very initial loading, like tf.compat.v2, etc. try: - _layer_package = "keras.api._v1.keras.__internal__.legacy.layers" + _layer_package = f"{_keras_package_name}.__internal__.legacy.layers" layers = _LazyLoader("layers", globals(), _layer_package) _module_dir = _module_util.get_parent_dir_for_name(_layer_package) if _module_dir: _current_module.__path__ = [_module_dir] + _current_module.__path__ setattr(_current_module, "layers", layers) - _legacy_rnn_package = "keras.api._v1.keras.__internal__.legacy.rnn_cell" + _legacy_rnn_package = f"{_keras_package_name}.__internal__.legacy.rnn_cell" _rnn_cell = _LazyLoader("legacy_rnn", globals(), _legacy_rnn_package) _module_dir = _module_util.get_parent_dir_for_name(_legacy_rnn_package) if _module_dir: @@ -140,11 +171,10 @@ # SavedModel registry. # See b/196254385 for more details. try: - importlib.import_module("keras.optimizers") - except (ImportError, AttributeError): - pass - try: - importlib.import_module("keras.src.optimizers") + if _keras_version == "keras_2": + importlib.import_module("keras.src.optimizers") + elif _keras_version == "tf_keras": + importlib.import_module("tf_keras.src.optimizers") except (ImportError, AttributeError): pass del importlib @@ -197,15 +227,20 @@ def _running_from_pip_package(): ) # Explicitly import lazy-loaded modules to support autocompletion. -# pylint: disable=g-import-not-at-top if _typing.TYPE_CHECKING: from tensorflow_estimator.python.estimator.api._v1 import estimator as estimator - from keras.api._v1 import keras - from keras.api._v1.keras import losses - from keras.api._v1.keras import metrics - from keras.api._v1.keras import optimizers - from keras.api._v1.keras import initializers -# pylint: enable=g-import-not-at-top + if _keras_version == "keras_2": + from keras.api._v1 import keras + from keras.api._v1.keras import losses + from keras.api._v1.keras import metrics + from keras.api._v1.keras import optimizers + from keras.api._v1.keras import initializers + elif _keras_version == "tf_keras": + from tf_keras.api._v1 import keras + from tf_keras.api._v1.keras import losses + from tf_keras.api._v1.keras import metrics + from tf_keras.api._v1.keras import optimizers + from tf_keras.api._v1.keras import initializers # Delete modules that should be hidden from dir(). # Don't fail if these modules are not available. diff --git a/tensorflow/compat_template_v1.__init__.py b/tensorflow/compat_template_v1.__init__.py index 28b8ffa40ec051..ec1ac65e5ad03a 100644 --- a/tensorflow/compat_template_v1.__init__.py +++ b/tensorflow/compat_template_v1.__init__.py @@ -38,9 +38,18 @@ _current_module.__path__ = [_module_dir] + _current_module.__path__ setattr(_current_module, "estimator", estimator) -_keras_module = "keras.api._v1.keras" -keras = _LazyLoader("keras", globals(), _keras_module) -_module_dir = _module_util.get_parent_dir_for_name(_keras_module) +_keras_package_name = None +_keras_version = None +if _os.environ.get("TF_USE_LEGACY_KERAS", None) in ("true", "True", "1"): + # Users can opt out of Keras 3 with this environment variable. + _keras_package_name = "tf_keras.api._v1.keras" + _keras_version = "tf_keras" +else: + _keras_package_name = "keras.api._v1.keras" + _keras_version = "keras_2" + +keras = _LazyLoader("keras", globals(), _keras_package_name) +_module_dir = _module_util.get_parent_dir_for_name(_keras_package_name) if _module_dir: _current_module.__path__ = [_module_dir] + _current_module.__path__ setattr(_current_module, "keras", keras) @@ -56,14 +65,14 @@ # actually trying to import it. Have a Try-Catch to make sure it doesn't break # when it doing some very initial loading, like tf.compat.v2, etc. try: - _layer_package = "keras.api._v1.keras.__internal__.legacy.layers" + _layer_package = f"{_keras_package_name}.__internal__.legacy.layers" layers = _LazyLoader("layers", globals(), _layer_package) _module_dir = _module_util.get_parent_dir_for_name(_layer_package) if _module_dir: _current_module.__path__ = [_module_dir] + _current_module.__path__ setattr(_current_module, "layers", layers) - _legacy_rnn_package = "keras.api._v1.keras.__internal__.legacy.rnn_cell" + _legacy_rnn_package = f"{_keras_package_name}.__internal__.legacy.rnn_cell" _rnn_cell = _LazyLoader("legacy_rnn", globals(), _legacy_rnn_package) _module_dir = _module_util.get_parent_dir_for_name(_legacy_rnn_package) if _module_dir: @@ -76,9 +85,19 @@ # pylint: disable=g-import-not-at-top if _typing.TYPE_CHECKING: from tensorflow_estimator.python.estimator.api._v1 import estimator as estimator - from keras.api._v1 import keras - from keras.api._v1.keras import losses - from keras.api._v1.keras import metrics - from keras.api._v1.keras import optimizers - from keras.api._v1.keras import initializers + try: + if _keras_version == "keras_2": + from keras.api._v1 import keras + from keras.api._v1.keras import losses + from keras.api._v1.keras import metrics + from keras.api._v1.keras import optimizers + from keras.api._v1.keras import initializers + elif _keras_version == "tf_keras": + from tf_keras.api._v1 import keras + from tf_keras.api._v1.keras import losses + from tf_keras.api._v1.keras import metrics + from tf_keras.api._v1.keras import optimizers + from tf_keras.api._v1.keras import initializers + except (ImportError, AttributeError): + pass # pylint: enable=g-import-not-at-top From 86c255ac098919ef6e72c957175a44f00d92e0c5 Mon Sep 17 00:00:00 2001 From: Dragan Mladjenovic Date: Fri, 22 Sep 2023 14:12:25 -0700 Subject: [PATCH 172/567] PR #5830: [ROCm] Fix matching in launch_dimensions.hlo and slice_to_dynamic.hlo Imported from GitHub PR https://github.com/openxla/xla/pull/5830 Copybara import of the project: -- 31c155a3110ac874912060f6cbc455b51f0c4858 by Dragan Mladjenovic : [ROCm] Fix matching in launch_dimensions.hlo and slice_to_dynamic.hlo Merging this change closes #5830 PiperOrigin-RevId: 567714298 --- .../xla/xla/service/gpu/tests/launch_dimensions.hlo | 10 +++++----- .../xla/xla/service/gpu/tests/slice_to_dynamic.hlo | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo b/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo index ef46dc686e9039..c2d36d0ad94042 100644 --- a/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo +++ b/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo @@ -134,8 +134,8 @@ ENTRY main { // CHECK-PTX: ![[tid_range]] = !{i32 0, i32 128} // CHECK-GCN: call i32 @llvm.amdgcn.workgroup.id.x(), !range ![[ctaid_range:[0-9]+]] // CHECK-GCN: call i32 @llvm.amdgcn.workitem.id.x(), !range ![[tid_range:[0-9]+]] -// CHECK-GCN: ![[ctaid_range]] = !{i32 0, i32 195313} -// CHECK-GCN: ![[tid_range]] = !{i32 0, i32 128} +// CHECK-GCN: ![[ctaid_range]] = !{i32 0, i32 97657} +// CHECK-GCN: ![[tid_range]] = !{i32 0, i32 256} HloModule NonElementwise @@ -168,8 +168,8 @@ ENTRY main { // CHECK-PTX: ![[tid_range]] = !{i32 0, i32 128} // CHECK-GCN: call i32 @llvm.amdgcn.workgroup.id.x(), !range ![[ctaid_range:[0-9]+]] // CHECK-GCN: call i32 @llvm.amdgcn.workitem.id.x(), !range ![[tid_range:[0-9]+]] -// CHECK-GCN: ![[ctaid_range]] = !{i32 0, i32 7813} -// CHECK-GCN: ![[tid_range]] = !{i32 0, i32 128} +// CHECK-GCN: ![[ctaid_range]] = !{i32 0, i32 3907} +// CHECK-GCN: ![[tid_range]] = !{i32 0, i32 256} HloModule NoFewWaves @@ -253,7 +253,7 @@ ENTRY main { // CHECK-PTX: ![[tid_range]] = !{i32 0, i32 128} // CHECK-GCN: call i32 @llvm.amdgcn.workgroup.id.x(), !range ![[ctaid_range:[0-9]+]] // CHECK-GCN: call i32 @llvm.amdgcn.workitem.id.x(), !range ![[tid_range:[0-9]+]] -// CHECK-GCN: ![[ctaid_range]] = !{i32 0, i32 1008} +// CHECK-GCN: ![[ctaid_range]] = !{i32 0, i32 1664} // CHECK-GCN: ![[tid_range]] = !{i32 0, i32 128} HloModule ScalarBroadcastFourInputs diff --git a/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo b/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo index 54aa009ef75a93..8c2b1144e951f9 100644 --- a/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo +++ b/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo @@ -59,7 +59,7 @@ // CHECK: %[[VAL_47:.*]] = mul i32 %[[VAL_44]], %[[VAL_0]] // CHECK: %[[VAL_48:.*]] = udiv i32 %[[VAL_39]], %[[VAL_47]] // CHECK: %[[VAL_49:.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr %[[VAL_50:.*]], i32 0, i32 %[[VAL_48]], i32 %[[VAL_46]], i32 %[[VAL_43]] -// CHECK: %[[VAL_51:.*]] = load i32, ptr %[[VAL_49]], align 4, !invariant.load !4 +// CHECK: %[[VAL_51:.*]] = load i32, ptr %[[VAL_49]], align 4, !invariant.load // CHECK: %[[VAL_52:.*]] = getelementptr inbounds i32, ptr %[[VAL_31]], i32 %[[VAL_19]] // CHECK: store i32 %[[VAL_51]], ptr %[[VAL_52]], align 4 // CHECK: br label %[[VAL_29]] From 4d076257f554c47c402549dec52ba2a10c8f85bc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 14:14:42 -0700 Subject: [PATCH 173/567] This is an automatic update to an allowlist, work in progress. PiperOrigin-RevId: 567714872 --- .../compatibility/gpu_compatibility.bin | Bin 30028 -> 32104 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.bin b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.bin index 9a985e1f5b878f589b8c6b177918e4aee4393876..9851273fc396d07380cb23f908b0326e1286be3d 100644 GIT binary patch literal 32104 zcmZ{t4_sYmn%7Srjx<{JN&=nd<{$dqT|$Md4IqE2fqB$`TgGV z_gS~~O~)C>YOxG#>Y(Gqu^epf8;+C03b3V2#~HvP*xJ_}X9SC3s{|UxDzVMy9A^xx z#8wF2i{)TbBvFH{?{}PASQWOt&vEWyF>H%G#<5xqcc)_D%P+@L-ZHCQ_BM>geH*{e z&u_qIuJ~CWY#hH~Y>?%wtN!(mjn3V%=v}RZ@HBIyP;X_Jm z0+zzB5u=$n?M;cchD+t;4eb|C@~5-F`i*A4&ht0C{jOK~N8m+DKMR(FpM&MHyrZ-_ zj8?YNx&U8o^T(}&&A&#Q!s0Avs~SEqT63(o_xnD4L}`t~`tWPTCRxr`S_l4+w~xI_ z>lwaPN^27q!Y>coVY!Cgwd0>NT186h3_MTyR>0PJ9cK~aRB=kej>9RKJ}$)}mK*WW z$NYU0W1j8o*zdyUFZ$UStRKGwwtLLWq8#%G^3H64{*ghi9<56MAbj!n`t==HzG!1~ z87B9#sA%W0PUqfbw8h`fy#=17a<7NYUE`QxySbN2dv$Z~3($XL^e5QH)_(}^yX0px zusD7-d-Y2s|CIp!@E`VSQ?2xm!>cd&_48mE_-*#=$)6)l|Hpb=pSO+v2HV*A-+<4k zcH0LV$8Q+ht$k^^Xy0Ppma69yqb>fn_Gj=$rM(QR!Y_iYv78zAZ*%HRuMRP#bpjr1 z^7|IRcIZ#nu@K9-O6!Kv+E#t;b@;sMLz1vb{6?`pmJ5~EBcnB;v>w9yl-3L^j$aKn z&$2!i$Nq@7kBv&}Fg&ERGGV#mhkcv+H^sY}J31Q*Q|&J_ltoIMLe{US;JroF5ylF{Skgo>W@%usZx=*b>WYDz6vc^7gS-X&r+XD6Je= z7Jl15V*bN&tlhsaJw_{2X%&G_URT>ZHCjW8e-5u#S}U*^ej)6@Fzv%^bAWSi{pYwQ zU_!J0v()n_=h@GAxn<+Gp~l&lU=u&|*8d9IZspQuu_A2#r`HvV1KV~dBG%VAmAMmB99E5H`$c;i?mHhGBa3~c*bFszyh#M(D8u6KH| zbtYPUSS~in;8}}pbDcAc6<|~BWDU0c9C5J4{V$Fc>c=ZE#MxeoHfNXbc-mlU^zLk_#;Tu(JOTu3y3a<78Sb>W3F>GD=&B5x4QH6cSr`=T2kZ4PBnmFtD)Tr%_zD<54ZSThk znBI>9*baTbI_B=j$&zqOX=rb(aEQI*Sm3W?-GJ4pShoXW4a0_s(`Uy*m1bkrjQt9S ziceCw*??G2VH>n-d%q6+iM{)^gI@u0S}73C!)d+NbA@TYyN@h2Btj>he74j3at=16 z^jiY->tS>B1)0107v)$;4SOzfU4AOZf{71PnJ3rtgltt9k8U* zej`A;7j_R_nL}{uIv2Y-uXQBcrt1DpjE<9KA!fClzX5SRfn};VKMshq3acc}fQfTC z`dV?+ALnwkICM$kWc|s#`*RvLX8OkTd3g?|YgGl4YyUhGr#scu752xubh50}-JcY` zbLIZ}--Sh0oCg7MCSgh9OfhE5wIy zT{#}qzo{%#>ei)zb^0851vaR3FT-?Q8ey|-#7CFz*l8+Dx>d1axHTEXZ}XS^c|L#@ zs2Cpy#F&HC6JxB&Yo|>mA!iw;YjN;TF*Y+!*|j(Z8zC{9<-oGgt3glB)vo5Ao}=X@`({AhUA{?Oi91VnZUWsI<0#aHmr0n!1Vc42b-s5XZduC#mz~v zPS@lvEUI)Lz;x{=VM%nSUiI2$>ov)^)ylott;upgjDxpn|2h7e9E0f?Ij}5Z)DXiP z7n2tyaXA-QV%$3EMDeNRZ@a#iVJW5mdVqckHo-u33p zO${X{-6EX%}U+U7*e`X2|xT7^{-XTaR|O0>1R^*QoqzWj2OjqUuiU^)2K z;VW~r_Rdsi!==LV7aEd@CME_&tk><~Raifh6I-VXR;jdGVbgKW|9Zz^Xt%UGlc~0L z?lknKicgk?Ni!A@|k3f6jV+99Lj5oCb;c9KM|wo12&- zI)_L1`ewr9zD~XIz1*B??!xzUfNumQ<8}yNIbKZ(*SkTqI|BUUuo?Uv{G|;Uf4jzI z=^78?8^b39OYi%KqRF*_%|3xeaIo0|Os@TJ{T{CC*xnpJwB6!{OQpE{w}qdG+uI)h zvm75L&%&fu)7T(ZhZSPNU&g0EZC43fWo#=T=4F=ijBh&kM%XOtGgvRWU3AM**P8*p zL$DEim(P23y4c#((b1f6J`C`khPC25fv@PwqX4P@lK|f}SOvcQ_*WvIZwXe7{xJH|KHHnR=+jG@nCd&50lLrs zIoc=JoVG?8EMMss!L}N?hDBG#3QyM?E2>zh^Sus}>sed(E!aHU+H3$Ox{y+=EtyXrTG~wr8JjeRcH>OxtnWoD{jsf>-6|@e2n%_r)TGN3Z{>92)4u(Dd>~mAF!GSD$OWtUte3|1@lj36!mW4yN_1V9Sh`dAs_Zaz48HogHre z34F3tn|cRUY4jzpIcx-rW9jql!+?0xuvX$tyyERodwXl7mu;Q|_^!b!@a@M}_CtL6 zaE{*DyC2!GYw#Halh_9UU$TlQt3|#@ft*~i)cYf&gL6U-7oA!p@ z2-o+6tdF4~`B5VG^8u_N%g;WB>9#%xt4Dhb?QgPeqP^iq<;9^BEtfi+Ro3Y?bNDYX z55&=qbsVOTX&x*C-70j&_hOGX-;_DaZVc`j;iqHtz*<#|Hv?ky!^Y~luT^V~Q<%I9VDHD1fEa7A3S#sV!`lxxg!@cR_JbIO)XE(jv&Z-D%S*6P z60+mG0@L-XhAlD9)T|>y&*)52!1g> zZM_y)uZnR!AVv~4$rw3`$uZ{cT%sXSS{f>)uMW`t0G6+G9|h>n!|KqCVR8-_-B2i; zbk+iNkKAP(qSLnb;YFCPNj}U$bAvHY<_>K<8jisB>_4nm>0J-dOTs46+WMW|F>Jlo z5GuaHTnnzxp$D)6rTKAy<{YdZ%`r5+cGueVnrU~kB=^DA1N4vnCGNwpfn9?WFkOQJ z*bd`eCHmgj(du4AxUt&3v8f6lo&R-MTH5TAD~x;j zU+G`CBKH_1f0o?SrZqtSHY}#}?+56Q!(`l^{q^+mZR(P1UeT8&xgMPl(0>M7V~6eh z5B=r6`DX}2zY%?}K23?F$)6=xzYrhY=Fh_hlzvBmejGN#80nxd$G54JVX~9C2)wY` zpWl60WrfFN&fSPf{`cTI{->}E75@oLAD=Z?1@ZgOdh_8_^F%6=7xQAgA z1eLhyvG@#3>sG+l7#j=Gb@!{|QbI0nWG>jz+0yK~P6|T?d*lf)( zOluaxwi(wdEX{J-T_s%S*#%1|%^LxleXwyf*J%UkJj=tS&PagfLs+iT{4_vw5mtld zh)=V`S%1WxnX;t@^VCj1$q~OT89W_3$vQF2k7Pi0`D&w@Yg{D+% zGmj%uEIMxm_z#FazJq@M7V-Zez<&ysz&AZkd-~pZ>Rr<|6h>a)89jdW_}Rzy&|l?x zo#chd`6WroaweRz`Fu~o#%XBw_Ge*we`BzfbKIxIWPh)BcD6V0JcC8|-fMz4NqRaP zuAdAwMMAJtK&-p4sEYL=mA{R)iS-p;#a~}@9KKJ!<-0YI z<0<=?1*Z%5?eA&Wm}2K(x&~FSWyZ%jGe-85v^Vv#oy>81+Sq#olK8bMy*sc9#oiBy zF#$`V-)Lf7Y3{gAH|iY5QY~@=`b50Br;R6T9Zl`c><`?#7v0gxCCdNKI@mmZS;jAn-lWo0RC`8?f!#DWby!%^BNt`*hsiaVL9zTI?)8d73sGL@87L&iXSf}gu@_Sy6 z5tZX<_^8oIABzZVgK=;f%S4Z7h21AZ(F-g(HLTOxca66A+d1BbS1Rpcn2g(fKJ8*x zdm=!4|KITP7Jpm&C-|;$p4)5#7Qrvyr(Nc1{}Q=3$^50Eu)C?f`)bFfhTe;bwq$p6 z!^v>yg$8cn^ju9`Y3gX;3_Vd?&amlJ5Ie^CXUBNk%V+;awV&UB7smXo4>pe9I((4j zB$~F?1K6U{nS>3!?Dsn`<<+29`98yURK?tch49P6zAgJ~z2{iY=#-6CEe+4k<0aSx z{emz~QAh59&+#euUE_Q%n6_7qpFZ|?Ol~W`-LH95c+Ag+U?ceTW8V1~=1DHk{{wWN z8QlS;`viWE{!emEx7RgT1%8DX-HUroRl;kwI{~`q{-#&UBBgry)=;d8b-r*+1v(eFcw-{~lx3$m1`;~SLY>_c<220mE!hN4_5y$JZxZ6aZ z$`Ii61;l#f#o9l?_I9j?qWc3sn}Nmgy9dufFWGsmxvQs(sn?~e%?+2@PevjV##sx9 zbLwxo_1Yg(aZbQ18UO6q1+X28Qc0Yj*&8Rtc@yFh;ni-|)!^IDr@j3fu*3;J>xJn$ ze+*;OeUq%PxBn16srt!jSSxe*nJa&k9!`L`|o%AuYI99N{ zN~cl7@&YW%z$x>WJXjLz=Wm(!M9|SSdK1Pb`?k*bW4;dGGJPi!Zayj41b(-$ufw>< z!=v9)XDC4TGow4IbU%S7Xx;YyEx>B=tF&~Z<>k&=fbOXWUcND3#-p zRCB{+*&e@$ty^B~YzF9_`8!@MqDuD^JX7h0V4E@81t$6O^3KVI?q;V7uIuoQ(Ot#Y zuEX2#Ni}aAgx$h#0FxTKy5^ml@c`{-MqB)C?I-X7)hDmPD)5u}$5-vGnDH+__uPzE ziz20a2419eD`0DkwTswS$BuiIwQyaFca826zIMLvz?W3tI075Ouh-IzmKHk`0lNGD zu9t70()|g()vC|jfJN}j$12S!``ZSDK;5jAfehxOstihUQoulm$Z?y9t0?QY{q<9tAjV;^|6 zX;d)|!!x-iv*TvMa>Wn(s*OZT_KqdRWljWNU8^^}So^nr;MaX!bXBYrYy!VcCeZ=o zc5}2uuF$zw42bj8#2Hd?K8H`Lds!>67=DBQ-#8)Z`kjow#~kU1KkqWw^l^`I$?H^- z=rEI|VLr2HJhc>Q^*L|+knhdbVsQ$(xnBesQ_#yhz%tv!Xb?NFyzk(%kL%%|;`$su z#_CCYRv3)r-IK&&boeYnHvwB={Zu8GZ>)z*wfuVrgR^{$CU0otWI>96PTR4i$lClMvS!p?NcAR z_1GU%+9%*KrCk8q;UuWs)ow!Dtw$y6bUkhw?RB=X>+vRhz0lA4VPp8MG0vusW3hSf zcr-x!snH%%+MmORl=ccNhTouHyM>ZC+X32V|AAMNkkWn$9#Yy-*gE&SihSB(_nvPp z>#B%h@B6z(dx>pq{tkTU7yN7lHiX||#8;Dc$$K(D`@o!+cdycZhHtOZ-h_qlOQCJr z3CFUuMb61AbadVqjCP*Veg&SVw5wrDa_ngsZS8~bR;BI0^6|^U()FMpX^~>*u};^c%hTR3 z{_}kjo;oAj%mW0VTJ&L94 zEn`B8yoUm`KQr3mZ)<-7A6425uv+{oF@`dy`(jUoMQ1HQ_tZb~>Jd}AC*U!qTL9bP z;lMg(a_*-6xqWLT>vTPC8g22nbAA)P{_}p;4;#a82utToH|yLA(0yWbhn4PU@L{F9 zEZgH3!P3W&b5TxnXDdMa<$13j;&11C8lF+;XA#&2_w$!AQ;$m}C!=m3Qo}l3kGn=& z{B7;q@a5V={|)Qs%w>XSQNhk%+y2rkV{gJjDNg0 z=kqXD?Gt}H=do$MCYz&B52s`BnJLFZo$5 zY=LocdUtH>I9VEU+g>t2`=QYme_Q(jyisXS!jkya?`peqN;!@T0ou?16R#fPZ)+cc zXDIC~SPp)UrCnMQx+dC%tkX4l-Dr!yt=$E`SLA1{uxZA{aV&it(ROPx5TO03(H4JO z`(t>u(w>9W<5z{H`=^q!@*bJ{tp;e9EqFB%e_Q)S_@OSBQUU5^B4|JZ1Yzpeck9#z^)FuC^U^3|@jtGyYZ zedeEf^$>qs`xJbo(9c4!O|DK?FmoKq+dChtS*L4q$7qYct$h>Tue66?a_!&0t1ah^ zoASE7;Y(TpsqcGd|v%_jwr2XP@}n+GpVls@^f!3KvK7 zm}!5!=-|yM8w0f8H`?NFYrhLmD(x{?KYj^Io4JTP*31NGAO2@v-r{d-AAnbU&(C() zz5u^$%pA+6@>1`+pt<<#W7%P}#oyMx0AJ1bvpU#3cNeBGqutIthu)RBcYyXsMqB)C z?GNAsN_z^Hz^~4)?Y5J}0PW+8UQNW`);=m+wZUvyHhw#dpH@v=?IPCcn!I7O#oyNM zfsd(qWdb&Z-x!u|C)C?LmV*J>kBzqY+uD!d$=~E>^RPPnVpuwF#>uNv@3jE!Q~%tn ziTK;vC*V0sy8yPs*tw3S^QH~DbJj}M>6+X$+Tw3(zX=~c?q~h5G5m(Gbl##Zd5;EY zKQ-FoZ)<-JuRrEzE3g=TpO??uO{MeYrh0vR>v|5TW9=~=SOx;I>@`0 zb-E_+8g2c&=^gm^&-vqxz=qK6-PM-7-F7k=pnc$9czKJzo%b_*^HtuPun>NEn5nm` zE%nYqN9TRPXp6tC{R(^|&(Er1ON`C4So&DfzAuV)Yk>9#MqB)C?f2lrO8XwH7r!`m z06llEmLOJu?vW+09<56EAUvvc9auhoS=e)~ZV$h9#5Fuz*P_GdW-Hwb@F~@|*1_hP zOH5(u_91<%w2xGP_D4or{OwwN03Y~SKbwNNzm0?O0?e%}G(=~a+Xp6s{cMn|u#zF!%h2NN^O--aE&R~G{W1}tpw)P|O zRpa|StPa1JrOm@uH*eXG^!q{*Ybzku%b&P4+8$Vu(Tzn;eQgT1f z!}Xk^2DVtpeRoXe%kus{{khW`p!F`-agDUnwpY@qE@*@oD?_7;W*l`DJ)gX*a@Ve}VTB zv2UW&)Xi(*Z6WTSG&y|%+K-I3_}khK;frcqo`J>jtHHjGw$!?~yu?`w(0%b=d-bSQ zy2s$XN;e0Vh2J(~^+7A=vN&y?b-ErmjJEjOIbVl2X8BnXHi_RT#wzn2Wu5>uc=sZP z1N1*P`nQz+r^1!~BCH0#3hbwi{_gu1C86~5!tH=KXFv676jE_sg5Toc+4~lStuwYR zVF!rQ-P%wpZ|T**b$#v{T|I}m4c}2?|1fMAO?f}m)$R0j?*-_7VRZZPwR3w4*K>+> zSQNhkOSgo4oqw}8-&bI)*;lA^UxpWE`(stYRtxB>FgbU44X3=gxTO0M%hy?#Z8Yez ze7~LB%es73(~n^icM>goe?Ejysy=uc){0*(Hpg=B^Zsp)u6SB;rF96dzmuE+%fl}d z%Vv3nBr+rs^Vwpwa+TJ3ct1OB`_{k~f1dsiBW2(Fg=J+SC&~IK8^|_v=}rzoWce<9 zjcsIEY9;5!1XhoYvrf0EXE0Xn8!+3XbA1Be$n@v52CKlY5R-m_$L8+aT08JjmH)Ya z>**FL-81kJrCR}8qwb4X70X-s{gmI2GdFB6mc@D5H%3GZ>_!L@Fe z(alo2E%1=it%uEhk7rg``dp4&Wd0YR`;pPr^T-e2c_d};!xSumW*sK^O1~c|E^!tD zbf5osULET3wKa~w`+vsIvS2y*IhHPSKc@h$>u}xZW+>ebc%jmb!)Ee%PZ5(kbTo&< z#oR;a3($RJbSLn&^L+?k%=NPwSRB6^TbIfEQh@G@t6m*ymF_Y4%yWL-99R~9+l-6+ zZ})u@_Z>btbr`G~u3k&O&KJX9*XK5jP4}%KVDHCU@M;xr05*!>ASUz4uikHQ<1uEr z-Y>E4mc1|IFeJ1by;PP&$9is@gmFKv>n@LxvV(gw0#V^n|;j8O(-^5x(!`8h6kE&R=V1xLj zu)X`^z83Gy1jIPJ=Iu{H#W(<8qF8p^9kwqJeN6h%7G5Xo=soG=;Hz`LY;<#!?yK-V zrCSSI;F^6J6WxwftE-y`(EZTp&Zukr_u+EwXXiTsOX1gOblq`^ImKLn?$Q6~)gi8Q z55Z+DwRJOKdH7}ebW5B(xUNH&(alo2E%21mt%uFgP$n^{gX9}2D|S-w8s+Ru%6cyjn2_mJiDkW4Y+CKc5RmD^G3r3OrX`qgTV0 zsN*bF$8v?zx^J}fIDQ8{rRskRCcm$k!p2$F{oN1!%wgpS@a$zpZl` zKA^NCunoqwWlZi5v|ml6+7eCO^4<3u*6DiOHQM5DYu|2kT|-OEfx-9dd8zmU2% zKJs78`LE&}gv-B~W5;%2`S@jtv8?;$3q~tXX}tp1{c<%-eqV7GbNl6x*Y?c4*Qn&5 z#7DP}hbG1}+u6B2fDZy}HVI4OSC2`%HrnN)y%3=N{DxNx@wau3z%x|a%YxvisL82z2^`t^I@zzfA^=eb8^uuuZjW>HLKQsCRO8*I5e}8igCSy?{wwrsYtL;45oBJy;c4wdX z+qu6C?`0xmvr5?NcW8Tr#?XYJ)HbLc#5{T;S1z%M&UpYOuvv5h{iT}D4k>9@e;U4C1?9yXWF{14lcKU4U= z0R2Zse}Zjnt%vYF)y8IEar|n6^ywW}SYM;I32%D!saE>O;krM|gJs~iN%GR4@pC78 z_k*VWl~IKQsCRO8*JGNa?S^D)1`| z)Gu{1{`=ni&%;P%UxCs;3tv#jIR=yOaLDf=;I8ryJMjSh2S$HRjnntx38g;{>%*@# zP@n6c0R5v+z5L@!{}8-UT{~yM^6<+H&@bgYC}906C%4@muN(agrQac3>BnI+jPLib zB+I(3JTzLbgfH^!_Fe>^?lmo|HG?Aozg!F*L`I+EE~Ta zI^sN*b*^1TD@$p$!0S0l>|E<%bLdQBdvfLXCi?>P9~u1#wz0Jy!q-(@XJB#sY6A76 z&WdcK&V}%nR}1;~G;ICj@G8~+=D}o+u*o=H$g=!4hrP|~MoR`Qn|FyX=Yh>yVbkcy zoFK)r?#u4N*3^7q6sGI{6ej28kn;T;9#QpKfyM9(VR9|${Wea}vEcX5{wF!9{d*NA zvGjAAb8y{PR>77T*XJ;|uZ++}VY>eB8O;SX2e=!cISL!VFPYY4mNyAoQrjN*iI=DR zyC`5pPd1Wsbv z*d&7Uv?Y07zdwiIH&%XTeJQ0-8{(!3HS2^yx%ORSpjS(lWSdo)Ii1Ip9c#$Rru7X?cOml zbl>?lT(_S=*eznLpy9Tk(%@L~U9qWvSO>Sg{Yjx=@6Q(jv9@6HeL*=6ZY-`$g7&MJ zAHK;)w^8j^kBOD3VqJz8Dc?retU0eZ4V@pJ{f!^i)cpFn+Fy;ivE>W090GGq#~Z0E zkFZUZ()qC$bD#WsruP0lhU+=$6091(D7LE|jf(bWfcBaH?VUg3Z)=}|>t|0P*e30M z1+%od3rTxtozDA?(H4JO`zCx=u_2iJTNM3%ZTDvHy#VbmjJEjO+E3wfe%fpu7KIh~ zw9DPRGoJ0u`#g+Y+9&?D&RKY-VlmhXeSmy_bhjpDVY%RK4A6exXp6tC{Vse$u`yUb zETOdNUY(f$?Zf}a%Uk?y?E~-{#dg?U?qz0Ud&Xy~p2s%&cy<~6ETz=~Pru)k?lI=|+Wc2m#FFu41qhs&W3{1XrToa@pDsxr>^us$|eX5n#ad=3vJXi*7 zlYV?p{yZ10V7;!-+eUwbZP@gye)R@?QL#RlJO>*N(3d`SG(i8U(H~M;pTm=it-xZi zP>?=7%}#*+x&Q0cr$}j?foCgL0b8R3UBq;KB3!rBv%XMm^A=2=0ZMJ?Zk$4v<@b|P zn7(H_im$HkQ<#)h##Fn$pTm=it-xZi5XLF%ymmFw*>Q>cp8>irf8puMJ!D(=G+h2| zG@C_W@}8dDYnFX5KbYlI!*#xQjP44)w#H5PtYSm35m-N#iEjEiFpJK;0PQb~w)orH zPvLRJ)?rbYJX7M3@SE=~&Q5^lxi8J}SKFU~XDe0#TSIFRb2TqDJ2fy}gS$pk=00|w zx8XC24a0_EeV81x)~ij|n%mfa__XDl&9dBm&ZbgdX9CW&P5#@L;{Vby(mq)($#i|H zt@TXX*;>|Sy7kUA>!c6g9i_d0Q_eh3x|F%_OKkBwMiU0rr literal 30028 zcmZ{t0aRUAy4N=uCc|WS8K#4IOdqeqL|*2$j~DN~T;Qe;`X+=n_o9( z0;|DR2tI%nV$&p1kFD=9W(+%uJ;^cVCRU4Wk;f#~fZ=W$M}GFR;iSLKZjar9v6-{f zm(8!kCr)RxG1w*i(%2;1n@a1c)f!e>kKqkUYYkS7UkKZjyy8mhMR=al z3d1&gsSQ@mcFDru=C0F6_*b*e{-r;h9si}ve!FXweiUAy^h2;GKcU}XF}7Eg z*4tKVi~Vl9-+<4zX0tSG3coRIi0v(<^}uS~R9bi814?TS)`DLhw#as_Iv)rBnBT@G zrF8&arnK^5h4>k)i0yhd-L`aFtpcTW3SO*y8(|A6t{aR?#gs>l;S$W8mq?iHL45Q% zzh`4iahx0b4t(klWwTjW0>64}=bTr@Ip;Ct%}RiNbkwg$ozg!9pZ;;SelaW;zs(-Q zc$CS#GA`P2_UYU&TW#@obAJ=Qc_Nz)z;5Dq1>4EJLi($h`&fYfW2--^^dG@nnzQwn z<#_yJyYjo@U>&>I?hc@pBh{SNltRYnApEtPH;*%y8U>(mHOnLQ3lxJoMYy z`Zcgs#?wWtj_qxwb=_*Ms4@2{d`eySBd{y@4Pj$!=Qm}?dTg~umDVG8i_%(#9mOw( zt;se`b?frtpYZEdtF#Wo3zb$WY&*#qihUcM)z)z3?R!w*xkV&Loy*yGUu>wOZ5cckA^bd{}8s!;<(lVsmVl zDXjy4(r;sj(%J`)D=h;n!7pEo?HZ+Z%4!uWt>f?wH73--?w=(mtdZ>orFF|{EwSHi z<8}Ci(i($Z!Y_?YvaQeaQ>!(sv>wA7l-3%o8ov;>$@YTE>y@j1z2ZviMR=al3d1%# z>04Me+o87Xw!CAtHrVgB@h$kQ8aoDIllYBdm)KrWc|EjR6H4nIJf*Z2V2$|IV)xnJ zR9Y|mDZh;kO6xgzsnRNh<>U9H!9<;WO6x3qxiQ(54=WcJ>mEeeudaU_E+0uw7r7@W z=JOYb^(=kxdwydD!H4*^ezfV@wB@k8KptFR)Tk6E7xpso^X#diB)5(6n_{i!lt;-slirRxD8@O*ffiBxzD*z zb6662=c@Wo2doicBh=45mv6!-XBHcP-E3$5gOQR+Rh38DqhUj{%*AwD^QmNOqOCa@ z?>$eB_|~f9AHYggoX=qE%5N1`LyR))J6YPT<;}^?G?%Z*y|%j@m9Tk|cH40brrS{i zTSa6M^V)HwJlb9n-W@9%rggM4Al7wQql)!@K&&a4B;4Z0LX~dg)QomT!;vEtZY3br z{y)RGO}}>Abr7c8RRqf;P91B3ucO~_q5s_Ld`cVHYEFiam}-3L`FB^V8#bc!-weVat{bkru^hdtZ@9WO!v(=m&h?;GD>Qw%R z#~D|+=G`{E4Ab?A!`8VszK?OKm^0}fFZaH#=5+K(g=u7egVKH*mQvb34bZ*<8$nmr z5L~+EbZ^gv?xfdMy?-Z0_mRplJ({^TAkOEoJQZi}pWWStZTw1!(_!PBjlUj=XU91k zkA%-ioDj$B_MCuC*s+nJ!K+z!K%4|j?){C8)0b}Tjb_I=bELAuYtJaYO_A*Se*}xG zI1d8iEW#RyGj_ti*JPR6+GPW+v#-|Xn923opK|RoPP^y&Ff6V7N@3frjEU&V`Jn!- zmEj7nE;a1a*FZ1qlG1$(ru)z!Y!cn|-RJAHYggjL!mM ztio!DF-#2Ur>*5-vjx+&IP~XOyD?9>wRjPBg~VJIhHbWRUxA*C1-)(k{mI_f%~80% zmM+4El;&lauEQ{F9L?26KgZYml3tyA+Y()!&3)eea%3sVRQ7e7bo*^XD16Sbw3`B z7FW6tV7m5;um*I;(3QT~aX~WfaB`1$HQ5S?ap*ezzc9NdFT!+;Fl>`KF-8o3UQC^q z#3deE%v(oH13nG>yIbG4U}>d)EkJ)1Hj4fr#!}8CQpx7NRBz9Df=J)(?m5@l+FX8E zE{a*vRB;}|@@*WJZ6;>-@BItA+p>*sDRDY}?B|;7?DFap#dn;8-TXtaCrzvs@s%}N zS5La9`An$l`Q}uz^>z9P`*nY~2peK?;_6(6)hO*@*f`p&4aP9FJK8;|bY~Y&8V1sl zBNfp^@_BO`-!7pNn*-*>^a@m+Rf+T0N+xW zJlCnQzGvIgZN2!O2=J|k$-G^Nubi*er03l%+HVK=55XqzU;8b;4_SY=#+8{GPvKjO zjdpMt;0J(N23W!MahXVH^d z&0v=>hBy<#gty~Us*Y=d&C8rg%(vLq*U}(tlKtzi`MSLf%Tm|(0(@`62JoH5SFW|w z9j)EnZAtSm!1q3^4&PC$D_;dj{rBD2or8gu;G4kLn;(;%{&Q--e^*LTO7R)u)2-)m z*kpmv-0Q46AVvZv_x^?$GSBy&?``jHHRf7??`#_Rd*brJ2U3 z7Ut&jDXh@u!)fsLJ%tUZbFv8wqn}1!`e#>bFJpRnYZu%3e`&YwOR!n)Io(*V!Ag|w zQJCEO7o#h4g|F++6^Q`dt1!8rb#>o`E!f;MZJ2<`ygkpPAm`ocnwbF2&tL^g^GjG- zX>P&F&`hDZlWU{{H&bwP_x$rRY=%M4&Gk4;_vbp;edfv{bp5%3Y|(ULoRS!6{A&4h z^=`oAp5A32z*5TZwlHE$|E6D$1rN`aeqI_{#rC7zsz;<09Klx z%|3(azP<{pL3F*$B{o z?yvIPnTf#7KMK?5I|O^e{8erB$v@E@^~W~Z@*W5gqaMFnK3%;xVFN10)qofyuq(tE z!sHzDbS~MPtf&ZA@Sa70?kBJkrTaKQcMVpJW(cE8WOT#fXv&ydyYr3078tbMHoOAU zHK~TJG2bm=vUcd?tKs?py*FV4O7Cib-U#dpTFbxBKZhL`n!}NEJpY92e)j-Ysx&_f z&|HPppgD}D^tFx)9lfvHekWV<94z;*?bfe^&2xZTgJUpVgBsW>$(5q-&mA4!O@tS# z%b%MP0s2>A38nvTfc^w*2>tn^eocB(u~SEwMMeK~sv|0$d_F+`3s|nw-**n2j~yMCYU4FUh&UwYX#F}5BG5Jd_-N%yRg!j&t%QrVq?z3 zb^I@2xhnp?_jb3>z)FapAU;EhNw=NmtL@J2O16vHr_cMVuql#q^Ewrv+X!1=Zp_cL ztNTn+Zf>|_O?TVC`M%~#DWe$@J(73z-h)+_`%Jj3i>CQ(!sOql+0P+a$9xKt_ta#6 zW{vO^rqAalEKFYMU;pYFio~N;Chu?T)_f5*$H3-lz6#TtwXhZD&k{$oihkD=pm`aV zRGQZVG{<0<&|IXU-nE$Oh*U)@%Ec? z*{n>Ms{2V;=2;0T$g2P*ZhbDYPuD61yUiRb^R)DZ)^tZ3Uq{H0W8Mw$pMVYFo6Poa z7ynNJ{Fh+$_-5v5U*Dfk{d@lAQ0#fW`@^pWKlj|e@HZLr!#At9L)bvHhL2-hvVQI+WgruxiEb1;ki@HKJc@W1MU2zQ{0Y zp25=X@&Nj9qOHG^Z`Qh7yV@92;Qq7d?jF8wWwi0%)8lJ56a8D9TXdw|458lrmh@>k zX5}TGA@O+}AKlJ6*!`p2|Nh8d50xJdn>Pad(y%G~HfwypD1J8r{KjF!_$^q!$l-{& z4XY;yx8FU2NnBwp70t@;X1#-zVOJ<#Ew=Tm^ewCmTX~T&01ILBN0>*jZ5lX+)ic^vm#g?ep|2j*Dc?9^&JVvPjQ%uu}^Ei zZMDVU)qVrsq_oqpDg4H=v?HGOXn^*oR$KgC?N5X&?Il<}e$`ppm7X@GGOaQXH;4LK zyZX*|pJ^UAo$O5YwKX4!hM#ZdQAGdwm0LyNq;KFFOSa@pJn*y^Zq7`-8tK}*?Rr( z5|vjHHjUo|X6N*1Ij?jM2k3rcbtjeX$MAfcXQp2BuqOPDV&5d0Xfzy)L?e}*Y_A4r zSI+u5i+`>pkICl?@T59_CE|#rB~` z-c{rsZAE-6K>M-P7Jpa!5&TIgn=Q-n_{FeHtz$f6_&MVE@5b|FgO`nEa?Jg^yW4RB z#;!f3D%Pvw%NXj$uZ69=%(Vh9w(^L^}F#&?KM_xODua?kU_Y-V8k93F>pXiiAw zbPOJ{<87wT)WBAM#5j%RaZpz@!ae&D=^neTy~H3RoD*65_4XuNo7)m+82@-1DN1kj*I;gcV2Uz@e~&e)s{L6Q zyY>{S*eBt$%$IIH4Y0);o~6s&%Jw=t-Tm*tnryz(ADJS(zo(J;ZY_uT5PR}9fBm_- zhq)Y^$EOG$f)8OcXz2RvpY!XJBDNcE54^^%YmxBwJ>mEeeubF5J|gj|DpP>3)@`@C zMN0Psyy%y+W7Wf!evLH&CgX+weQlhT3;jPp`y;C@{%+3i!)MhvHw8=M*MdpTyHb*mz#7_Mv4YjyLLZaaL1HH_PaCfGdl-fc|s zQ1Y<_Cxrv(w&AS@oU7S#-6Ty$6_f!`_M;zEyUl=?^$@G8j}lP z1^8_fTiI=Uw>CP<9*U zU@iF7IlA$Rh*=KMJ^T;+I@BxOgYXhHt`))Z@Y`ZO%%jGv|IWnYRo!fw82fZB-nQD} z@7DMY_|^}zSsFHl-xwz6&$|bT94VKvXEZ?jQ>!ihuJ$MJ4!!<^)#F! zSNC1fRk0>uL-@_Z1LoGYc)8rMv0e*^^V~o5+O(%d#d!uktm5Rsituat|HcW+?YRl# zmv|;S?^Cdm5}(O9x4@#aXfIa4{TE~P3gdSN1Mkvz7#I1RV1cNq*+bMX#FJ-ev z*aGw4G)5}kJA=`1MIW0c9iV;JYKy3x*-_?E=-l((-U@~tva3V8#N5awbl6M*Vj`Hbhyy75y@n$ypZob#Psa>sEIK z2UqtheB}q(Yy@@%zah-_k90+(!kcq$255h2wZ-4n{v1A}wAW#A{7Nxfk2B>*;_V`y zdv|x?VF!u*~>WURl*IX_`BK<;nhm}KCBMEIA*U$#*whu z2+)1`W4|8NO1Bce`orv4Ww0&g#nqj;@eF;$>(@uwr|a=kt1bR+&hNm7mG&jr7=FV$ z+O5>u)1C~_ermPF-_?E$4=L?6ST%m39c}XV|K7s6HB^A>Hdp9}|Re`2-8-_`yYUZ?utJgf=7qdVGC6Hj|J zK)dpuUlZ|nwO@d5s&#yc_~N(0{FynIqTNkRs@bP&@{ZLOe^>i0`0Oucvq9J-exq3C zT(WLBFZCV^(0**S#oyI_1aDE=%W^z^F)Wieeb8I8ZUtx``xkyq#NX9^3BFRA&0??( z=Fep;lecI~-u3L$HMwE6#oyJw2ESRH%|>CP_zmu8cTp2hdn!PC&wW2{@prYqz_(Q8 zy#b5iSAw}Uk#m`cj?Vj})fRtO`!)FHFJ!Z$uw~}vdCY1{-d;aR1ZaO?wZ-4nz6DRJ z`;ST3AbuT~t+%Hw^_~mRKJYL7nux!vy$>E&c^g;>e)&7va_yWJ?PB)nn)F(2@prY` z;VaK(vnJR)bMtL%FM8fuEy?|FfbLzZJFVvO58=Z~cN&(&uMx}Kf3Qq3ivhX^ANaLs zQo09(mt^Og4=cpaV440QW2^L!5c_m3-mu!@@7AIhzVy9p)&ZM&p79TJ&S4kQW;j6m z6RR!$uJ*_9I(7ds4{O5j=#Dmj=I4*Cs{z`T|H`k4_`BLKz>8GgCE|DT+JP+{3?L74DZ#c0EVeK3c>q|e@o*@?heGt!cT{csohvgt-&rivUOJWJ&SN1HXG6AMf#0;6-{Rlrzs5R{&(R#l zSd3xJdtYXS?KlU@J3*UREq)1(J<5K!9v?^?)>!ju?QjcTrgEQz4dT~1!y1m*S@y+yD|5{m-4fjftBEwk9`w829`+lTqGjm#q860_gZc7ceUH$X{Fr+ zn`f@RjY-W~`*`iGGt5(nRx=Qw{jt>+e^>h>_iuR9r_`_$?VNoL z!(sv>wAZXp9?o4OWd`2;2XV`&GMN+PsLR zu)Q)sS5Cf)+YS$_bxIR#{s)YIn0KAfpJS(KQ-JPWt2>Ra zoBN0GV%45$SQ5WROxhr0Uo28?76Wt-{yRV4CZ&4-KBVUQd{`lV#?fURVM^h;4i~L% zuF~y>hm>vtHe13Rj!7N5+oI73v-)6w?meqJg|AzOJMg8VY&Hu^;8*YJvJkuqA zUxx;z`y71s>)E=6uzdWUeBT)UxAPsM_wIyTA`IBg=PzVlqo~DS*XJ6HLvz*;aNBVi zUZ>&>!^ZKGdwp4(eD$5B7msnoVC>4dspjUp@Wlh!`Om>x@T zhu;=+l&>3!@>PHd!*#xIS=}dUKD!7nR=O$JZT!ZqZhthwxAmO+0NsaHcS7mjgXd8{ zsXt})J{Mq(_|;+su9)hu zO1B9%Pe-|pNgX8LSY^bd;q}V*uGO7ZbLfX~N}iSPG%Sf+oq+|1p^ShEf`v zWLuAS51eDwcYTRO~2xRNaGzU{9DM*X;ax zq4Qi1vZe;Eb+208bv4(%4Ua3`A=m_dnR&Udtt%zEV*$F4t?sDOeFR@+@^$;svK)_J z43qk#dipxk{H=*-Zv|){`_FzY#NX9<2|lc}W3Ub8wPj51k-N?()1ArIKKc8$diLph z+_2i>?`mIzhm`gxY!tsiOy=FLw$6+4K>TKa?w3|~Na=nqx~gxk!{qlDORaCRwZ9cv zV?NuR?{OG=azaY?7`#;7)7QY{Hysx-Z=KMcZX4j>#sJ-0R(Av8#%)hbq6 z$KiThu7k;MD9vHsxE%KTo_&rNm;6)s=>BoX#+czaH@6SqmjEui4NKwIgh{`)+Et>x z7@&P{!>@(-yE+HpxvKBw!wT_}g)qA^+Lf8#MKduG=3A9 zlY6D)eknlvq16_DH}`w+O$1!F0Bgjr7ISisHg~1UMSnd&|E2%x*QQ44N8xKqKLmTi zc)o@?xkp92mVHP0bo04pwZ-4%m*FF-4-Uh|@wc0H5fH%4az^q=`}e(p)7zZYJ};Nj|Tb9^a&1ws1!olr5y==0iZ z_4Ac}JNz~a7FWLsHebN{58IVLOZdS6{d-n_iep@@JMcl($7W#({OW`B868*HU$2gd zZu<49Q~HPCdORzJ$$O=nBroF`f6~0W9g!oZ7C&95Yc|fB8k;Y}M^qmghK=KQ3FCj+ z&)&G3oea=_YW0Ve{$qHV(qDsB;};6luQ0j)eRuvRV5E{$s`OulFRJrg3tM5nUBGZx z`A1D6K>q`)Kd<@=))+q$pZv05`~4e$Xx^K6Bo!Ha(zmcp+I)3vUM zM9mWWuc%xP{SUtujY|JnxE?DDUs))SR=(0|hc~Hlw+S|n&TVX0uKcFd zV1WKTt3SmtZoTiohgDr?VF~=|1NGx(h5hU5T8M7>wUFP;cJ&XzkE-#v7$$3kP3G|s z+uKU(4XY)SmaEk(zV>`&&RYj;1|3-wq}kSE*$vp5S}%;lbp1bv$#prRd_RT9RDD)p zwfKcGxtH|+{ypei#7y4*Bq!CrlQ4-@qGG)U*JI^Tn7mg%k9lKdj6Mp}^?%Q5E~+)a zjR4JYnEX9JDx=9N?>6kdI(F|DexCB%=k9rU8lbre3*%Ru(PZ^S*^K<6WnaPKVwnx_)md^O+CK4O*d?;Wi;FzuEA#&8-I^J$%`wGV#RXU&fF>~aQ5U$sx_hEJT#jzdj zcwDqM1GHcH-~RO@{;u|maQ)p=7`92jU%?!0o!*{AcqZneeV)xHX!Q)~n#zc)RU zt?fPRy&0hWrPUUHSNn6gT%Rsmhs9x~S=v=z-nmbA=Y0Z3lXAr0)p-@3r&uj)g)v|O z+o?%qRBm`%0<_<^+T!nOzYCvIYyvg}ODb)KS2G)+{mlRI^A>+sdoR3Rv2Bi*XPE`q zuKAg&7juk0pS@N;Uum_&Hwoy*Y=X@LZezRjxwZxa^zT`H{ToYn;G;@^7M6h32kD0^ z%}RiNbla~_ozgl44=Yv-lfMt!WE|g>Ki`X1vtQTe9jm{=F&z5Uc>5N7NwGngd=EAn zpf6+Tc!2)rR)0ikeF{%0wgQv)kHSIvj5ON;`mg<8zdmJ3>lJu`V%4xU2GAu;*C)n( zOB4I$@3q}JT!zVafWjDVOo;78=9M(2pV^M%tLytYOv);As;m1cJf+wQtQHo=xMa=i z=aW6%XL$Y@pnL2~Uss+XySgvI<-dR8vKVZGeC1iQw88$Eh^d3?e6L&G6?|QdtMEC+ zMqpQ9Ls%ZVneXt}G&cjZzqH!o?`nSzPbjtyi^JqQB~A&y>)LL%12kXznLYpN_*dWs zidDnb&|1Pg%`!)h+$=jM40KC9R$Y!o(#$vNvd-+G~~llH@B9Nz-A<>_+) zmHGw?aF%WIPfnWu%f!g|_H-ti`x57x`7vwn&yBt4^6ne%-a5njcC~h&Z*M)F tKHtlO^Ig{u Date: Fri, 22 Sep 2023 14:20:02 -0700 Subject: [PATCH 174/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1. PiperOrigin-RevId: 567716100 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index 7fb69c7a3ed804..feaebb3685d3ba 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "3b7012849fde349a679c495f5bd421935d6599f9" - TFRT_SHA256 = "aff0363d40d564af9ad599f40b4b24b819e84549eb7ea9d5fb8875817c76a4bf" + TFRT_COMMIT = "2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1" + TFRT_SHA256 = "c767bf0e2da2a767a8a4d51fc0e1f314f7c46df6097b3ac03ca3612cdb5889c3" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index 7fb69c7a3ed804..feaebb3685d3ba 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "3b7012849fde349a679c495f5bd421935d6599f9" - TFRT_SHA256 = "aff0363d40d564af9ad599f40b4b24b819e84549eb7ea9d5fb8875817c76a4bf" + TFRT_COMMIT = "2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1" + TFRT_SHA256 = "c767bf0e2da2a767a8a4d51fc0e1f314f7c46df6097b3ac03ca3612cdb5889c3" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index 7fb69c7a3ed804..feaebb3685d3ba 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "3b7012849fde349a679c495f5bd421935d6599f9" - TFRT_SHA256 = "aff0363d40d564af9ad599f40b4b24b819e84549eb7ea9d5fb8875817c76a4bf" + TFRT_COMMIT = "2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1" + TFRT_SHA256 = "c767bf0e2da2a767a8a4d51fc0e1f314f7c46df6097b3ac03ca3612cdb5889c3" tf_http_archive( name = "tf_runtime", From 11ead767cba2489056fc577fee350025b2fc768a Mon Sep 17 00:00:00 2001 From: Jake Harmon Date: Fri, 22 Sep 2023 14:22:36 -0700 Subject: [PATCH 175/567] Fix build error on windows PiperOrigin-RevId: 567716676 --- tensorflow/tools/pip_package/build_pip_package.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index 07f755ac6137cc..ef6f655ba6039b 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -259,8 +259,13 @@ function prepare_src() { # We copy from bazel-bin/tensorflow instead of bazel-bin/internal to copy # headers from TSL/XLA into tensorflow so that InstallHeaders can move # them back into tensorflow/include - cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_tsl/tsl/ ${TMPDIR}/tensorflow - cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_xla/xla/ ${TMPDIR}/tensorflow/compiler + if is_windows; then + cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.exe.runfiles/local_tsl/tsl/ ${TMPDIR}/tensorflow + cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.exe.runfiles/local_xla/xla/ ${TMPDIR}/tensorflow/compiler + else + cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_tsl/tsl/ ${TMPDIR}/tensorflow + cp -RLn bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/local_xla/xla/ ${TMPDIR}/tensorflow/compiler + fi # Fix the proto stubs if is_macos; then find ${TMPDIR}/tensorflow/ -name "*.py" -type f -exec sed -i '' 's/from tsl\./from tensorflow.tsl./' {} \; From 5f7a3ed8d9672da3307865ec76102fead5296df8 Mon Sep 17 00:00:00 2001 From: Kanglan Tang Date: Fri, 22 Sep 2023 14:42:34 -0700 Subject: [PATCH 176/567] Set the default clang version to 17 or 16 PiperOrigin-RevId: 567721179 --- configure.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/configure.py b/configure.py index e1e1326826e3be..cbb74beb00c22f 100644 --- a/configure.py +++ b/configure.py @@ -610,7 +610,9 @@ def set_clang_cuda_compiler_path(environ_cp): """Set CLANG_CUDA_COMPILER_PATH.""" default_clang_path = '/usr/lib/llvm-17/bin/clang' if not os.path.exists(default_clang_path): - default_clang_path = which('clang') or '' + default_clang_path = '/usr/lib/llvm-16/bin/clang' + if not os.path.exists(default_clang_path): + default_clang_path = which('clang') or '' clang_cuda_compiler_path = prompt_loop_or_load_from_env( environ_cp, @@ -819,9 +821,11 @@ def set_clang_compiler_path(environ_cp): string value for clang_compiler_path. """ # Default path if clang-16 is installed by using apt-get install - default_clang_path = '/usr/lib/llvm-16/bin/clang' + default_clang_path = '/usr/lib/llvm-17/bin/clang' if not os.path.exists(default_clang_path): - default_clang_path = which('clang') or '' + default_clang_path = '/usr/lib/llvm-16/bin/clang' + if not os.path.exists(default_clang_path): + default_clang_path = which('clang') or '' clang_compiler_path = prompt_loop_or_load_from_env( environ_cp, From 5f771ea1907d0974a2c78891de7c4f3fede8244c Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Fri, 22 Sep 2023 14:56:25 -0700 Subject: [PATCH 177/567] [XLA] Initialize tuple shapes of async-done in dataflow analysis. PiperOrigin-RevId: 567724401 --- third_party/xla/xla/python/xla_client.py | 2 +- .../xla/xla/service/hlo_dataflow_analysis.cc | 7 ++++- .../xla/service/hlo_dataflow_analysis_test.cc | 29 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/python/xla_client.py b/third_party/xla/xla/python/xla_client.py index 0c19224f787919..d6172122d20e4f 100644 --- a/third_party/xla/xla/python/xla_client.py +++ b/third_party/xla/xla/python/xla_client.py @@ -44,7 +44,7 @@ # Just an internal arbitrary increasing number to help with backward-compatible # changes. In JAX, reference this via jax._src.lib.xla_extension_version. -_version = 196 +_version = 197 # Version number for MLIR:Python components. mlir_api_version = 54 diff --git a/third_party/xla/xla/service/hlo_dataflow_analysis.cc b/third_party/xla/xla/service/hlo_dataflow_analysis.cc index f1bb8f07cbfd2d..1f72360f3b4bfd 100644 --- a/third_party/xla/xla/service/hlo_dataflow_analysis.cc +++ b/third_party/xla/xla/service/hlo_dataflow_analysis.cc @@ -1503,7 +1503,12 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { }); break; case HloOpcode::kAsyncDone: - // AsyncDone's output aliases its output. + // AsyncDone's output aliases its output. It defines all remaining + // tuple-shaped values. + define_all_values([&](const ShapeIndex& index) { + return ShapeUtil::GetSubshape(instruction->shape(), index) + .IsTuple(); + }); break; case HloOpcode::kCopyStart: // CopyStart produces a tuple of {destination buffer, aliased operand, diff --git a/third_party/xla/xla/service/hlo_dataflow_analysis_test.cc b/third_party/xla/xla/service/hlo_dataflow_analysis_test.cc index 98ce55b9d4ab5b..2a01eee2626286 100644 --- a/third_party/xla/xla/service/hlo_dataflow_analysis_test.cc +++ b/third_party/xla/xla/service/hlo_dataflow_analysis_test.cc @@ -1183,6 +1183,35 @@ ENTRY %main (a: f32[4096], b: f32[4096]) -> f32[4096] { } } +TEST_P(HloDataflowAnalysisTest, TupleShapedAsyncOp) { + std::string hlo_str = R"( + HloModule module + + ENTRY entry { + p0 = f32[2,3] parameter(0) + async-start = ((f32[2,3]), (f32[2,3], f32[2,3]), u32[]) custom-call-start(p0), custom_call_target="foo" + async-update = ((f32[2,3]), (f32[2,3], f32[2,3]), u32[]) custom-call-update(async-start), custom_call_target="foo" + ROOT async-done = (f32[2,3], f32[2,3]) custom-call-done(async-update), custom_call_target="foo" + } +)"; + TF_ASSERT_OK_AND_ASSIGN( + module_, ParseAndReturnVerifiedModule(hlo_str, GetModuleConfigForTest())); + + bool ssa_form = GetParam(); + const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + + const HloInstruction* async_start = + FindInstruction(module_.get(), "async-start"); + const HloInstruction* async_update = + FindInstruction(module_.get(), "async-update"); + const HloInstruction* async_done = + FindInstruction(module_.get(), "async-done"); + + EXPECT_TRUE(analysis.ValueIsDefinedAt(async_start, /*index=*/{1})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(async_update, /*index=*/{1})); + EXPECT_TRUE(analysis.ValueIsDefinedAt(async_done)); +} + TEST_P(HloDataflowAnalysisTest, SendAndSendDone) { // Test that a Send forwards its operand to the output tuple at {0}. auto builder = HloComputation::Builder(TestName()); From 44251d54e6c5f58e4909af0d948123d6634ec1ea Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 22 Sep 2023 15:13:02 -0700 Subject: [PATCH 178/567] Integrate LLVM at llvm/llvm-project@46d5d264fc66 Updates LLVM usage to match [46d5d264fc66](https://github.com/llvm/llvm-project/commit/46d5d264fc66) PiperOrigin-RevId: 567728405 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index eb146861cd0cb1..f24443e14ea083 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "6e3827af98fa59d5147598972625a5317936c31f" - LLVM_SHA256 = "bda0e24e2b92f19d2929237101edc1f66fa64f5407d32cbabaf44f878ff0827c" + LLVM_COMMIT = "46d5d264fc66a017bbd0182b2b5fcc0f3f23d3be" + LLVM_SHA256 = "f31d546ecdcd07971f7f8f5f6f83ee2e101bf677975fe96d6ba837628eba7e24" tf_http_archive( name = name, From 745a7fae7eafa9fb16849fd5ee9f24e0c2db3b7e Mon Sep 17 00:00:00 2001 From: Kanglan Tang Date: Fri, 22 Sep 2023 15:21:10 -0700 Subject: [PATCH 179/567] Fix TF-TPU build flags PiperOrigin-RevId: 567730240 --- ci/official/envs/nightly_linux_x86_tpu_py310 | 2 +- ci/official/envs/nightly_linux_x86_tpu_py311 | 2 +- ci/official/envs/nightly_linux_x86_tpu_py39 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/official/envs/nightly_linux_x86_tpu_py310 b/ci/official/envs/nightly_linux_x86_tpu_py310 index 96e6967b3e774e..d802791f6c9f8d 100644 --- a/ci/official/envs/nightly_linux_x86_tpu_py310 +++ b/ci/official/envs/nightly_linux_x86_tpu_py310 @@ -1,7 +1,7 @@ source ci/official/envs/ci_default source ci/official/envs/ci_nightly_uploads TFCI_BAZEL_BAZELRC_ARGS=(--bazelrc ./ci/official/bazelrcs/cpu.bazelrc) -TFCI_BAZEL_COMMON_ARGS=(--config sigbuild_remote_cache_push --config resultstore --repo_env=TF_PYTHON_VERSION=3.10 --define=with_tpu_support=true --define=enable_mlir_bridge=true --config=opt --define=framework_shared_object=true --copt=-DLIBTPU_ON_GCE) +TFCI_BAZEL_COMMON_ARGS=(--config sigbuild_remote_cache_push --config resultstore --repo_env=TF_PYTHON_VERSION=3.10 --config=tpu) TFCI_BUILD_PIP_PACKAGE_ARGS=(--tpu --nightly_flag) TFCI_DOCKER_IMAGE=tensorflow/build:latest-python3.10 TFCI_NIGHTLY_UPDATE_VERSION_ENABLE=1 diff --git a/ci/official/envs/nightly_linux_x86_tpu_py311 b/ci/official/envs/nightly_linux_x86_tpu_py311 index 6e15a1ba0064ed..c797ad33cac6a7 100644 --- a/ci/official/envs/nightly_linux_x86_tpu_py311 +++ b/ci/official/envs/nightly_linux_x86_tpu_py311 @@ -1,7 +1,7 @@ source ci/official/envs/ci_default source ci/official/envs/ci_nightly_uploads TFCI_BAZEL_BAZELRC_ARGS=(--bazelrc ./ci/official/bazelrcs/cpu.bazelrc) -TFCI_BAZEL_COMMON_ARGS=(--config sigbuild_remote_cache_push --config resultstore --repo_env=TF_PYTHON_VERSION=3.11 --define=with_tpu_support=true --define=enable_mlir_bridge=true --config=opt --define=framework_shared_object=true --copt=-DLIBTPU_ON_GCE) +TFCI_BAZEL_COMMON_ARGS=(--config sigbuild_remote_cache_push --config resultstore --repo_env=TF_PYTHON_VERSION=3.11 --config=tpu) TFCI_BUILD_PIP_PACKAGE_ARGS=(--tpu --nightly_flag) TFCI_DOCKER_IMAGE=tensorflow/build:latest-python3.11 TFCI_NIGHTLY_UPDATE_VERSION_ENABLE=1 diff --git a/ci/official/envs/nightly_linux_x86_tpu_py39 b/ci/official/envs/nightly_linux_x86_tpu_py39 index fb40176dbb24b1..f738c2cb75c216 100644 --- a/ci/official/envs/nightly_linux_x86_tpu_py39 +++ b/ci/official/envs/nightly_linux_x86_tpu_py39 @@ -1,7 +1,7 @@ source ci/official/envs/ci_default source ci/official/envs/ci_nightly_uploads TFCI_BAZEL_BAZELRC_ARGS=(--bazelrc ./ci/official/bazelrcs/cpu.bazelrc) -TFCI_BAZEL_COMMON_ARGS=(--config sigbuild_remote_cache_push --config resultstore --repo_env=TF_PYTHON_VERSION=3.9 --define=with_tpu_support=true --define=enable_mlir_bridge=true --config=opt --define=framework_shared_object=true --copt=-DLIBTPU_ON_GCE) +TFCI_BAZEL_COMMON_ARGS=(--config sigbuild_remote_cache_push --config resultstore --repo_env=TF_PYTHON_VERSION=3.9 --config=tpu) TFCI_BUILD_PIP_PACKAGE_ARGS=(--tpu --nightly_flag) TFCI_DOCKER_IMAGE=tensorflow/build:latest-python3.9 TFCI_NIGHTLY_UPDATE_VERSION_ENABLE=1 From 38c38ded948b16381b21b60d8cb9a53952bc8f03 Mon Sep 17 00:00:00 2001 From: Yang Chen Date: Fri, 22 Sep 2023 15:40:49 -0700 Subject: [PATCH 180/567] #tf-data-service Clean up log for distributed snapshot. PiperOrigin-RevId: 567734740 --- tensorflow/core/data/service/dispatcher_impl.cc | 11 ++--------- .../core/data/service/snapshot/snapshot_manager.cc | 6 ------ 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index d4070af6b565bb..89fcb140272c79 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -389,7 +389,6 @@ void DataServiceDispatcherImpl::ReportProcessingTimesFromActiveTasks( Status DataServiceDispatcherImpl::WorkerHeartbeat( const WorkerHeartbeatRequest* request, WorkerHeartbeatResponse* response) { - absl::Time start_time = absl::FromUnixMicros(env_->NowMicros()); TF_RETURN_IF_ERROR(CheckStarted()); VLOG(3) << "Received worker heartbeat request from worker " << request->worker_address(); @@ -443,9 +442,7 @@ Status DataServiceDispatcherImpl::WorkerHeartbeat( } VLOG(3) << "Finished worker heartbeat for worker at address " - << request->worker_address() << " in " - << (absl::ToDoubleSeconds(absl::FromUnixMicros(env_->NowMicros()) - - start_time)); + << request->worker_address(); return OkStatus(); } @@ -1179,7 +1176,6 @@ Status DataServiceDispatcherImpl::GetSnapshotStreams( Status DataServiceDispatcherImpl::GetSnapshotSplit( const GetSnapshotSplitRequest* request, GetSnapshotSplitResponse* response) { - absl::Time start_time = absl::FromUnixMicros(env_->NowMicros()); TF_RETURN_IF_ERROR(CheckStarted()); absl::flat_hash_map>::iterator @@ -1193,10 +1189,7 @@ Status DataServiceDispatcherImpl::GetSnapshotSplit( request->base_path()); } } - auto status = it->second->GetSnapshotSplit(*request, *response); - LOG(INFO) << "[tf.data snapshot] GetSnapshotSplit took " - << absl::FromUnixMicros(env_->NowMicros()) - start_time; - return status; + return it->second->GetSnapshotSplit(*request, *response); } Status DataServiceDispatcherImpl::DisableCompressionAtRuntime( diff --git a/tensorflow/core/data/service/snapshot/snapshot_manager.cc b/tensorflow/core/data/service/snapshot/snapshot_manager.cc index 7644ee8c6f6850..39d08b0cc2ae4e 100644 --- a/tensorflow/core/data/service/snapshot/snapshot_manager.cc +++ b/tensorflow/core/data/service/snapshot/snapshot_manager.cc @@ -580,10 +580,7 @@ absl::Status SnapshotManager::GetSnapshotSplit( TF_RETURN_IF_ERROR(ResetSource(source, request.source_index())); } - absl::Time start_time = absl::FromUnixMicros(env_->NowMicros()); TF_RETURN_IF_ERROR(source.split_provider->GetNext(&split, &end_of_splits)); - LOG(INFO) << "[tf.data SnapshotManager] GetNext took " - << absl::FromUnixMicros(env_->NowMicros()) - start_time; if (end_of_splits) { response.set_end_of_splits(true); return absl::OkStatus(); @@ -592,15 +589,12 @@ absl::Status SnapshotManager::GetSnapshotSplit( ++stream.num_assigned_splits_per_source[request.source_index()]; ++num_assigned_splits_; } - absl::Time start_time = absl::FromUnixMicros(env_->NowMicros()); std::string split_path = SplitPath( path_, request.stream_index(), request.source_index(), request.repetition_index(), local_split_index, global_split_index); TF_RETURN_IF_ERROR(AtomicallyWriteTFRecords( split_path, {split}, tsl::io::compression::kNone, env_)); split.AsProtoTensorContent(response.mutable_split()); - LOG(INFO) << "[tf.data SnapshotManager] Writing split took " - << absl::FromUnixMicros(env_->NowMicros()) - start_time; return absl::OkStatus(); } From ca20c978199289c147d0942861aac76b48cce33c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 15:52:05 -0700 Subject: [PATCH 181/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc. PiperOrigin-RevId: 567737020 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index feaebb3685d3ba..321212b36339e1 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1" - TFRT_SHA256 = "c767bf0e2da2a767a8a4d51fc0e1f314f7c46df6097b3ac03ca3612cdb5889c3" + TFRT_COMMIT = "e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc" + TFRT_SHA256 = "775221f0d876c5d5df52f8c6fdd072bc52352c65276908d371fb164888e0c0d4" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index feaebb3685d3ba..321212b36339e1 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1" - TFRT_SHA256 = "c767bf0e2da2a767a8a4d51fc0e1f314f7c46df6097b3ac03ca3612cdb5889c3" + TFRT_COMMIT = "e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc" + TFRT_SHA256 = "775221f0d876c5d5df52f8c6fdd072bc52352c65276908d371fb164888e0c0d4" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index feaebb3685d3ba..321212b36339e1 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "2b29f4d9ec1bcbfaabc1d552c8e7aac9dc2809a1" - TFRT_SHA256 = "c767bf0e2da2a767a8a4d51fc0e1f314f7c46df6097b3ac03ca3612cdb5889c3" + TFRT_COMMIT = "e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc" + TFRT_SHA256 = "775221f0d876c5d5df52f8c6fdd072bc52352c65276908d371fb164888e0c0d4" tf_http_archive( name = "tf_runtime", From d7027bd713999cdb3def5d365a0bca49b1b0f440 Mon Sep 17 00:00:00 2001 From: Ziyin Huang Date: Fri, 22 Sep 2023 15:59:11 -0700 Subject: [PATCH 182/567] Add a warning for Feature/table config names if not set. This field is required for running embedding lookup on latest tpu hardware. PiperOrigin-RevId: 567738526 --- tensorflow/python/tpu/tpu_embedding_v2_utils.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/tpu/tpu_embedding_v2_utils.py b/tensorflow/python/tpu/tpu_embedding_v2_utils.py index e04ba6760528fc..53fec7c18f619d 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_utils.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_utils.py @@ -1046,7 +1046,8 @@ def __init__(self, 'mean' the default. 'sqrtn' often achieves good accuracy, in particular with bag-of-words columns. For more information, see `tf.nn.embedding_lookup_sparse`. - name: An optional string used to name the table. Useful for debugging. + name: An optional string used to name the table. Must be defined if + running on SparseCore. quantization_config: The simulated quantization config. An instance of `tf.tpu.experimental.embedding.QuantizationConfig`. See the class for more documentation. @@ -1083,6 +1084,12 @@ def __init__(self, f"Argument `combiner` must be one of {accepted_combiners}. " f"Received: {combiner}") + if name is None: + logging.warning( + "Name of the table config must be specified for running on" + " SparseCore. Different table configs must have unique names." + ) + self.vocabulary_size = vocabulary_size self.dim = dim self.initializer = initializer @@ -1224,7 +1231,8 @@ def __init__(self, has to match the shape (for ragged tensor, the input shape and output shape can mismatch). If not provided, the shape can be either provided to the `embedding.build` or auto detected at the runtime. - name: An optional name for the feature, useful for debugging. + name: An optional string used to name the table. Must be defined if + running on SparseCore. Returns: `FeatureConfig`. @@ -1242,6 +1250,11 @@ def __init__(self, raise ValueError( f"Argument `max_sequence_length` must be an int and must be >= 0. " f"Received: {max_sequence_length}") + if name is None: + logging.warning( + "Name of the Feature config must be specified for running on" + " SparseCore. Different feature configs must have unique names." + ) self.table = table self.max_sequence_length = max_sequence_length From 8803725ae8ae13147e306b510e59caf2dbab58bd Mon Sep 17 00:00:00 2001 From: Jieying Luo Date: Fri, 22 Sep 2023 16:08:19 -0700 Subject: [PATCH 183/567] [PJRT C API] Add a custom call C API to the priv of GPU PJRT_Api* and implement it in GPU plugin. The priv of GPU PJRT_Api* has a chained of PJRT_Structure_Base which contains a type and a pointer to next PJRT_Structure_Base. Rename priv to extension_start. PiperOrigin-RevId: 567740522 --- third_party/xla/xla/pjrt/c/BUILD | 13 +++++ third_party/xla/xla/pjrt/c/pjrt_c_api.h | 19 +++++-- .../xla/xla/pjrt/c/pjrt_c_api_gpu_extension.h | 52 +++++++++++++++++++ .../xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc | 29 +++++++++-- .../xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc | 31 +++++++++++ .../xla/xla/pjrt/c/pjrt_c_api_wrapper_impl.h | 5 +- 6 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_extension.h diff --git a/third_party/xla/xla/pjrt/c/BUILD b/third_party/xla/xla/pjrt/c/BUILD index ba1adb05c17794..83c8c354d38231 100644 --- a/third_party/xla/xla/pjrt/c/BUILD +++ b/third_party/xla/xla/pjrt/c/BUILD @@ -34,6 +34,15 @@ cc_library( ], ) +cc_library( + name = "pjrt_c_api_gpu_extension_hdrs", + hdrs = ["pjrt_c_api_gpu_extension.h"], + visibility = ["//visibility:public"], + deps = [ + ":pjrt_c_api_hdrs", + ], +) + cc_library( name = "pjrt_c_api_wrapper_impl", srcs = ["pjrt_c_api_wrapper_impl.cc"], @@ -134,6 +143,7 @@ cc_library( hdrs = ["pjrt_c_api_gpu_internal.h"], visibility = ["//visibility:public"], deps = [ + ":pjrt_c_api_gpu_extension_hdrs", ":pjrt_c_api_hdrs", ":pjrt_c_api_helpers", ":pjrt_c_api_wrapper_impl", @@ -142,6 +152,7 @@ cc_library( "//xla/pjrt/gpu:gpu_helpers", "//xla/pjrt/gpu:se_gpu_pjrt_client", "//xla/python:inspect_sharding", # To register "InspectSharding" custom partitioning handler. + "//xla/service:custom_call_target_registry", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", @@ -204,6 +215,7 @@ xla_cc_test( tags = tf_cuda_tests_tags(), deps = [ ":pjrt_c_api_gpu", + ":pjrt_c_api_gpu_extension_hdrs", ":pjrt_c_api_hdrs", ":pjrt_c_api_helpers", ":pjrt_c_api_test_base", @@ -213,6 +225,7 @@ xla_cc_test( "//xla:statusor", "//xla/pjrt:pjrt_client", "//xla/pjrt:pjrt_common", + "//xla/service:custom_call_target_registry", "//xla/service:gpu_plugin", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api.h b/third_party/xla/xla/pjrt/c/pjrt_c_api.h index 3bbc4cea4827a9..f3064d1fe41b13 100644 --- a/third_party/xla/xla/pjrt/c/pjrt_c_api.h +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api.h @@ -53,7 +53,7 @@ extern "C" { // Changes include: // * Adding a new field to the PJRT_Api or argument structs // * Renaming a method or argument (doesn't affect ABI) -#define PJRT_API_MINOR 30 +#define PJRT_API_MINOR 31 // The plugin should set the major_version and minor_version of // PJRT_Api.pjrt_api_version to be the `PJRT_API_MAJOR` and `PJRT_API_MINOR` in @@ -1901,6 +1901,20 @@ PJRT_DEFINE_STRUCT_TRAITS(PJRT_Compile_Args, executable); // PJRT_Client before execution. typedef PJRT_Error* PJRT_Compile(PJRT_Compile_Args* args); +// -------------------------------- Extension ---------------------------------- + +typedef enum { + PJRT_Structure_Type_Gpu_Custom_Call = 0, +} PJRT_Structure_Type; + +// PJRT_Structure_Base contains a type and a pointer to next +// PJRT_Structure_Base. The framework can go through this chain to find +// structure and identify it with the type. +typedef struct PJRT_Structure_Base { + PJRT_Structure_Type type; + const struct PJRT_Structure_Base* next; +} PJRT_Structure_Base; + // -------------------------------- API access --------------------------------- #define _PJRT_API_STRUCT_FIELD(fn_type) fn_type* fn_type @@ -1908,7 +1922,7 @@ typedef PJRT_Error* PJRT_Compile(PJRT_Compile_Args* args); // Please modify PJRT_Api_STRUCT_SIZE if the last field of PJRT_Api is changed. typedef struct { size_t struct_size; - void* priv; + void* extension_start; PJRT_Api_Version pjrt_api_version; @@ -2025,7 +2039,6 @@ const size_t PJRT_Api_STRUCT_SIZE = PJRT_STRUCT_SIZE(PJRT_Api, PJRT_Executable_OutputDimensions); #undef _PJRT_API_STRUCT_FIELD -#undef PJRT_DEFINE_STRUCT_TRAITS #ifdef __cplusplus } diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_extension.h b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_extension.h new file mode 100644 index 00000000000000..f86803a2ae38be --- /dev/null +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_extension.h @@ -0,0 +1,52 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_PJRT_C_PJRT_C_API_GPU_EXTENSION_H_ +#define XLA_PJRT_C_PJRT_C_API_GPU_EXTENSION_H_ + +#include + +#include "xla/pjrt/c/pjrt_c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PJRT_API_GPU_EXTENSION_VERSION 0 + +struct PJRT_Gpu_Register_Custom_Call_Args { + size_t struct_size; + const char* function_name; + size_t function_name_size; + void* custom_call_function; +}; +PJRT_DEFINE_STRUCT_TRAITS(PJRT_Gpu_Register_Custom_Call_Args, + custom_call_function); + +// Registers a custom call. +typedef PJRT_Error* PJRT_Gpu_Register_Custom_Call( + PJRT_Gpu_Register_Custom_Call_Args* args); + +typedef struct PJRT_Gpu_Custom_Call { + PJRT_Structure_Type type; + const void* next; + PJRT_Gpu_Register_Custom_Call* custom_call; +} PJRT_Gpu_Custom_Call; + +#ifdef __cplusplus +} +#endif + +#endif // XLA_PJRT_C_PJRT_C_API_GPU_EXTENSION_H_ diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc index 75a14f42118b4d..e14bf2b20f5768 100644 --- a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_internal.cc @@ -25,17 +25,21 @@ limitations under the License. #include "absl/status/status.h" #include "absl/strings/str_format.h" #include "xla/pjrt/c/pjrt_c_api.h" +#include "xla/pjrt/c/pjrt_c_api_gpu_extension.h" #include "xla/pjrt/c/pjrt_c_api_helpers.h" #include "xla/pjrt/c/pjrt_c_api_wrapper_impl.h" #include "xla/pjrt/gpu/gpu_helpers.h" #include "xla/pjrt/gpu/se_gpu_pjrt_client.h" #include "xla/pjrt/pjrt_client.h" #include "xla/pjrt/pjrt_common.h" +#include "xla/service/custom_call_target_registry.h" #include "tsl/platform/errors.h" namespace pjrt { namespace gpu_plugin { +#define PJRT_GPU_PLUGIN_PLATFORM_NAME "CUDA" + PJRT_Error* PJRT_Client_Create(PJRT_Client_Create_Args* args) { PJRT_RETURN_IF_ERROR(CheckMatchingStructSizes( "PJRT_Client_Create_Args", PJRT_Client_Create_Args_STRUCT_SIZE, @@ -116,10 +120,27 @@ PJRT_Error* PJRT_GpuDeviceTopology_Create( "Topology not supported for GPU compilation.")}; } -constexpr PJRT_Api pjrt_api = - pjrt::CreatePjrtApi(pjrt::gpu_plugin::PJRT_Client_Create, - pjrt::gpu_plugin::PJRT_GpuDeviceTopology_Create, - pjrt::PJRT_Plugin_Initialize_NoOp); +PJRT_Error* PJRT_Gpu_Register_Custom_Call( + PJRT_Gpu_Register_Custom_Call_Args* args) { + PJRT_RETURN_IF_ERROR(CheckMatchingStructSizes( + "PJRT_Gpu_Register_Custom_Call_Args", + PJRT_Gpu_Register_Custom_Call_Args_STRUCT_SIZE, args->struct_size)); + std::string function_name(args->function_name, args->function_name_size); + xla::CustomCallTargetRegistry::Global()->Register( + function_name, args->custom_call_function, PJRT_GPU_PLUGIN_PLATFORM_NAME); + return nullptr; +} + +PJRT_Gpu_Custom_Call custom_call{ + /*type=*/PJRT_Structure_Type::PJRT_Structure_Type_Gpu_Custom_Call, + /*next=*/nullptr, + /*custom_call=*/PJRT_Gpu_Register_Custom_Call, +}; + +constexpr PJRT_Api pjrt_api = pjrt::CreatePjrtApi( + pjrt::gpu_plugin::PJRT_Client_Create, + pjrt::gpu_plugin::PJRT_GpuDeviceTopology_Create, + pjrt::PJRT_Plugin_Initialize_NoOp, static_cast(&custom_call)); const PJRT_Api* GetGpuPjrtApi() { return &pjrt_api; } diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc index 63102691defd0c..319ee8cec112db 100644 --- a/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api_gpu_test.cc @@ -33,12 +33,14 @@ limitations under the License. #include "absl/time/clock.h" #include "absl/time/time.h" #include "xla/pjrt/c/pjrt_c_api.h" +#include "xla/pjrt/c/pjrt_c_api_gpu_extension.h" #include "xla/pjrt/c/pjrt_c_api_helpers.h" #include "xla/pjrt/c/pjrt_c_api_test.h" #include "xla/pjrt/c/pjrt_c_api_test_base.h" #include "xla/pjrt/c/pjrt_c_api_wrapper_impl.h" #include "xla/pjrt/pjrt_client.h" #include "xla/pjrt/pjrt_common.h" +#include "xla/service/custom_call_target_registry.h" #include "xla/status.h" #include "xla/statusor.h" #include "tsl/platform/status.h" @@ -235,5 +237,34 @@ TEST(PjrtCApiGpuAllocatorTest, InvalidAllocatorOptionsParsing) { api->PJRT_Error_Destroy(&error_destroy_args); } + +void TestCustomCall() {} + +TEST(PjrtCApiGpuPrivTest, CustomCall) { + PJRT_Gpu_Register_Custom_Call_Args args; + args.struct_size = PJRT_Gpu_Register_Custom_Call_Args_STRUCT_SIZE; + std::string function_name = "function_name"; + args.function_name = function_name.c_str(); + args.function_name_size = function_name.size(); + args.custom_call_function = reinterpret_cast(&TestCustomCall); + auto api = GetPjrtApi(); + const PJRT_Structure_Base* next = + reinterpret_cast(api->extension_start); + while (next != nullptr && + next->type != + PJRT_Structure_Type::PJRT_Structure_Type_Gpu_Custom_Call) { + next = next->next; + } + ASSERT_NE(next, nullptr); + + PJRT_Error* error = + reinterpret_cast(next)->custom_call(&args); + + CHECK_EQ(error, nullptr); + void* custom_call = + xla::CustomCallTargetRegistry::Global()->Lookup(function_name, "CUDA"); + EXPECT_EQ(custom_call, reinterpret_cast(&TestCustomCall)); +} + } // namespace } // namespace pjrt diff --git a/third_party/xla/xla/pjrt/c/pjrt_c_api_wrapper_impl.h b/third_party/xla/xla/pjrt/c/pjrt_c_api_wrapper_impl.h index 841725c595554a..7003f9c24ebac0 100644 --- a/third_party/xla/xla/pjrt/c/pjrt_c_api_wrapper_impl.h +++ b/third_party/xla/xla/pjrt/c/pjrt_c_api_wrapper_impl.h @@ -399,10 +399,11 @@ PJRT_Error* PJRT_Plugin_Initialize_NoOp(PJRT_Plugin_Initialize_Args* args); constexpr PJRT_Api CreatePjrtApi( PJRT_Client_Create* create_fn, PJRT_TopologyDescription_Create* topology_create_fn, - PJRT_Plugin_Initialize* plugin_initialize_fn) { + PJRT_Plugin_Initialize* plugin_initialize_fn, + void* extension_start = nullptr) { return PJRT_Api{ /*struct_size=*/PJRT_Api_STRUCT_SIZE, - /*priv=*/nullptr, + /*extension_start=*/extension_start, /*pjrt_api_version=*/ PJRT_Api_Version{/*struct_size=*/PJRT_Api_Version_STRUCT_SIZE, From 5c845a9e76a76474302bda77ec9c7f93579e8227 Mon Sep 17 00:00:00 2001 From: Kanglan Tang Date: Fri, 22 Sep 2023 16:08:53 -0700 Subject: [PATCH 184/567] Internal changes only PiperOrigin-RevId: 567740639 --- .../tools/pip_package/build_pip_package.sh | 17 +++++++++++++++++ tensorflow/tools/pip_package/setup.py | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index ef6f655ba6039b..f7b4a655a67beb 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -210,6 +210,23 @@ function prepare_src() { cp -L \ bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/LICENSE \ "${TMPDIR}" + # Check if it is a tpu build + if [[ ${TPU_BUILD} == "1" ]]; then + # Check if libtpu.so exists + if [[ -f "./tensorflow/lib/libtpu.so" ]]; then + if [[ ! -L "${RUNFILES}/tensorflow/lib/libtpu.so" ]]; then + mkdir "$(real_path ${RUNFILES}/tensorflow/lib)" + ln -s $(real_path ./tensorflow/lib/libtpu.so) $(real_path ${RUNFILES}/tensorflow/lib/libtpu.so) + echo "Created symlink: $(real_path ./tensorflow/lib/libtpu.so) -> \ + $(real_path ${RUNFILES}/tensorflow/lib/libtpu.so)" + else + echo "Symlink already exists: ${RUNFILES}/tensorflow/lib/libtpu.so" + fi + else + echo "Libtpu.so is not found in $(real_path ./tensorflow/lib/)" + exit 1 + fi + fi cp -LR \ bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \ "${TMPDIR}" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 80b6e28ed1fc1c..a3f46675f70633 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -316,6 +316,10 @@ def find_files(pattern, root): for path in so_lib_paths: matches.extend(['../' + x for x in find_files('*', path) if '.py' not in x]) +# If building a tpu package, bundle libtpu.so as part of the wheel +if '_tpu' in project_name: + matches.append('tensorflow/lib/libtpu.so') + if os.name == 'nt': EXTENSION_NAME = 'python/_pywrap_tensorflow_internal.pyd' else: From 8bffb7a2070abb941a12ea21d6f079a617cdaa58 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 16:42:27 -0700 Subject: [PATCH 185/567] Fix a typo in `array_impl_test_lib`. PiperOrigin-RevId: 567747222 --- third_party/xla/xla/python/ifrt/array_impl_test_lib.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/xla/python/ifrt/array_impl_test_lib.cc b/third_party/xla/xla/python/ifrt/array_impl_test_lib.cc index ad9571ac05d3fd..b29a4707e23d4c 100644 --- a/third_party/xla/xla/python/ifrt/array_impl_test_lib.cc +++ b/third_party/xla/xla/python/ifrt/array_impl_test_lib.cc @@ -488,7 +488,7 @@ TEST(ArrayImplTest, ReshardToDifferentDevice) { TF_ASSERT_OK_AND_ASSIGN( auto reshared_array, - array->Reshard(sharding, ArrayCopySemantics::kAlwaysCopy)); + array->Reshard(new_sharding, ArrayCopySemantics::kAlwaysCopy)); std::vector out_data(6); auto future = reshared_array->CopyToHostBuffer( From 5cd0a8fd0ef40b95726cb73ffb6484e4078a3a5e Mon Sep 17 00:00:00 2001 From: Fiona Lang Date: Fri, 22 Sep 2023 16:53:45 -0700 Subject: [PATCH 186/567] Import ops/gradients_impl.py in eager/backprop.py to ensure that registered gradients are available for gradient lookup. Remove nn_grad.py's dependency on eager/backprop.py to remove the cycle that would block this, by moving the nn gradients that depend on backprop to a new file. PiperOrigin-RevId: 567749407 --- tensorflow/python/eager/BUILD | 1 + tensorflow/python/eager/backprop.py | 1 + tensorflow/python/ops/BUILD | 17 +- tensorflow/python/ops/gradients_impl.py | 1 - tensorflow/python/ops/nn.py | 1 + .../python/ops/nn_fused_batch_norm_grad.py | 166 ++++++++++++++++ .../python/ops/nn_fused_batchnorm_test.py | 6 +- tensorflow/python/ops/nn_grad.py | 180 ++---------------- tensorflow/python/ops/nn_ops.py | 2 + 9 files changed, 205 insertions(+), 170 deletions(-) create mode 100644 tensorflow/python/ops/nn_fused_batch_norm_grad.py diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 2ce65e6ab4530c..e72f54c48fd553 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -707,6 +707,7 @@ py_strict_library( "//tensorflow/python/ops:check_ops", "//tensorflow/python/ops:control_flow_util", "//tensorflow/python/ops:default_gradient", + "//tensorflow/python/ops:gradients_impl", "//tensorflow/python/ops:math_ops_gen", "//tensorflow/python/ops:resource_variable_ops", "//tensorflow/python/ops:unconnected_gradients", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 16fc829ee6ca20..57593eac09b26d 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -42,6 +42,7 @@ from tensorflow.python.ops import default_gradient from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import gradients_impl # pylint: disable=unused-import from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops.parallel_for import control_flow_ops as pfor_ops from tensorflow.python.ops.unconnected_gradients import UnconnectedGradients diff --git a/tensorflow/python/ops/BUILD b/tensorflow/python/ops/BUILD index cba5c7c60e69ac..ca03c9485561b1 100644 --- a/tensorflow/python/ops/BUILD +++ b/tensorflow/python/ops/BUILD @@ -1627,7 +1627,6 @@ py_strict_library( ":manip_grad", ":math_grad", ":math_ops", - ":nn_grad", ":optional_grad", ":random_grad", ":rnn_grad", @@ -2182,6 +2181,7 @@ py_strict_library( ":ctc_ops", ":embedding_ops", ":math_ops", + ":nn_fused_batch_norm_grad", ":nn_grad", ":nn_impl", ":nn_impl_distribute", @@ -2197,8 +2197,18 @@ py_strict_library( ":array_ops", ":array_ops_stack", ":math_ops", - ":nn_ops", ":nn_ops_gen", + "//tensorflow/python/framework:dtypes", + "//tensorflow/python/framework:ops", + ], +) + +py_strict_library( + name = "nn_fused_batch_norm_grad", + srcs = ["nn_fused_batch_norm_grad.py"], + deps = [ + ":array_ops", + ":math_ops", "//tensorflow/python/eager:backprop", "//tensorflow/python/framework:dtypes", "//tensorflow/python/framework:ops", @@ -2215,6 +2225,7 @@ py_strict_library( ":check_ops", ":math_ops", ":math_ops_gen", + ":nn_grad", ":nn_ops_gen", ":random_ops", ":stateless_random_ops", @@ -3684,7 +3695,7 @@ cuda_py_strict_test( ":gradient_checker", ":gradients_impl", ":math_ops", - ":nn_grad", + ":nn_fused_batch_norm_grad", ":nn_impl", ":nn_ops", ":nn_ops_gen", diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index b47f8d8bf9e595..9812b382093768 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -30,7 +30,6 @@ from tensorflow.python.ops import manip_grad # pylint: disable=unused-import from tensorflow.python.ops import math_grad # pylint: disable=unused-import from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import optional_grad # pylint: disable=unused-import from tensorflow.python.ops import random_grad # pylint: disable=unused-import from tensorflow.python.ops import rnn_grad # pylint: disable=unused-import diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index 8aa4a2fd74cef8..753f1708494d63 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -24,6 +24,7 @@ from tensorflow.python.ops import ctc_ops as _ctc_ops from tensorflow.python.ops import embedding_ops as _embedding_ops from tensorflow.python.ops import nn_grad as _nn_grad +from tensorflow.python.ops import nn_fused_batch_norm_grad as _nn_fused_batch_norm_grad from tensorflow.python.ops import nn_ops as _nn_ops from tensorflow.python.ops.math_ops import sigmoid from tensorflow.python.ops.math_ops import tanh diff --git a/tensorflow/python/ops/nn_fused_batch_norm_grad.py b/tensorflow/python/ops/nn_fused_batch_norm_grad.py new file mode 100644 index 00000000000000..245f65cdf28752 --- /dev/null +++ b/tensorflow/python/ops/nn_fused_batch_norm_grad.py @@ -0,0 +1,166 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Batch norm gradients for operators defined in nn_ops.py.""" + +from tensorflow.python.eager import backprop +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def _BatchNormGrad(grad_y, + x, + scale, + pop_mean, + pop_var, + epsilon, + data_format, + is_training=True): + """Returns the gradients for the 3 inputs of BatchNorm. + + Args: + grad_y: A `Tensor` of 4 or 5 dimensions for gradient for y. + x: A `Tensor` of 4 or 5 dimensions for x. + scale: A `Tensor` of 1 dimension for scaling. + pop_mean: A `Tensor` of 1 dimension for the population mean. Only used when + is_training=False. + pop_var: A `Tensor` of 1 dimension for the population variance. Only used + when is_training=False. + epsilon: A small float number added to the variance of x. + data_format: The data format for input. Either b"NHWC" or b"NCHW". + is_training: A bool value to indicate the operation is for training + (default) or inference. + + Returns: + A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient + for x, grad_scale the gradient for scale, and grad_offset the gradient + for offset. + """ + x_dtype = x.dtype.base_dtype + if x_dtype == dtypes.float16 or x_dtype == dtypes.bfloat16: + # float16 math is too imprecise, so we do the batch norm gradient + # computations in float32. + x = math_ops.cast(x, dtypes.float32) + grad_y = math_ops.cast(grad_y, dtypes.float32) + if is_training: + if data_format == b"NHWC": + keepdims = False + reduce_axis = [0, 1, 2] + elif data_format == b"NDHWC": + keepdims = False + reduce_axis = [0, 1, 2, 3] + elif data_format == b"NCHW": + keepdims = True + reduce_axis = [0, 2, 3] + shape = [1, array_ops.size(scale), 1, 1] + scale = array_ops.reshape(scale, shape) + else: + keepdims = True + reduce_axis = [0, 2, 3, 4] + shape = [1, array_ops.size(scale), 1, 1, 1] + scale = array_ops.reshape(scale, shape) + mean_grad_y = math_ops.reduce_mean(grad_y, reduce_axis, keepdims=keepdims) + mean_x = math_ops.reduce_mean(x, reduce_axis, keepdims=keepdims) + var_x = math_ops.reduce_mean( + math_ops.squared_difference(x, array_ops.stop_gradient(mean_x)), + reduce_axis, + keepdims=keepdims) + grad_y_offset = grad_y - mean_grad_y + x_offset = x - mean_x + mean = math_ops.reduce_mean( + grad_y * x_offset, axis=reduce_axis, keepdims=keepdims) + grad_x = scale * math_ops.rsqrt(var_x + epsilon) * ( + grad_y_offset - math_ops.reciprocal(var_x + epsilon) * mean * x_offset) + grad_scale = math_ops.rsqrt(var_x + epsilon) * math_ops.reduce_sum( + grad_y * x_offset, axis=reduce_axis, keepdims=keepdims) + if data_format == b"NCHW" or data_format == b"NCDHW": + grad_scale = array_ops.squeeze(grad_scale) + grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis) + return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset + else: + if data_format == b"NHWC": + reduce_axis = [0, 1, 2] + elif data_format == b"NDHWC": + reduce_axis = [0, 1, 2, 3] + elif data_format == b"NCHW": + reduce_axis = [0, 2, 3] + shape = [1, array_ops.size(pop_mean), 1, 1] + pop_mean = array_ops.reshape(pop_mean, shape) + pop_var = array_ops.reshape(pop_var, shape) + scale = array_ops.reshape(scale, shape) + else: + reduce_axis = [0, 2, 3, 4] + shape = [1, array_ops.size(pop_mean), 1, 1, 1] + pop_mean = array_ops.reshape(pop_mean, shape) + pop_var = array_ops.reshape(pop_var, shape) + scale = array_ops.reshape(scale, shape) + + grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis) + var_rsqrt = math_ops.rsqrt(pop_var + epsilon) + grad_scale = math_ops.reduce_sum( + grad_y * (x - pop_mean) * var_rsqrt, axis=reduce_axis) + grad_x = grad_y * scale * var_rsqrt + return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset + + +@ops.RegisterGradient("FusedBatchNormGrad") +def _FusedBatchNormGradGrad(op: ops.Operation, *grad): + """Returns the gradients for the 3 inputs of FusedBatchNormGrad. + + Args: + op: The FusedBatchNormGradOp for which we need to compute gradients. + *grad: An argument list for tensors of gradients wrt the outputs with + grad[0] as grad_grad_x, grad[1] as grad_grad_scale, grad[2] as + grad_grad_offset. + + Returns: + A tuple (grad_grad_y, grad_x, grad_scale, None, None), where grad_grad_y + is the gradient for grad_y, grad_x the gradient for x, grad_scale the + gradient for scale. + """ + data_format = op.get_attr("data_format") + epsilon = op.get_attr("epsilon") + is_training = op.get_attr("is_training") + grad_y = op.inputs[0] + x = op.inputs[1] + scale = op.inputs[2] + pop_mean = op.inputs[3] + pop_var = op.inputs[4] + grad_grad_x = grad[0] + grad_grad_scale = grad[1] + grad_grad_offset = grad[2] + with backprop.GradientTape() as tape: + tape.watch(grad_y) + tape.watch(x) + tape.watch(scale) + grad_x, grad_scale, grad_offset = _BatchNormGrad( + grad_y, x, scale, pop_mean, pop_var, epsilon, data_format, is_training) + grad_initial = [grad_grad_x, grad_grad_scale, grad_grad_offset] + grad_grad_y, grad_x, grad_scale = tape.gradient( + [grad_x, grad_scale, grad_offset], [grad_y, x, scale], grad_initial) + return grad_grad_y, grad_x, grad_scale, None, None + + +@ops.RegisterGradient("FusedBatchNormGradV2") +def _FusedBatchNormGradGradV2(op: ops.Operation, *grad): + return _FusedBatchNormGradGrad(op, *grad) + + +@ops.RegisterGradient("FusedBatchNormGradV3") +def _FusedBatchNormGradGradV3(op: ops.Operation, *grad): + grad_grad_y, grad_x, grad_scale, _, _ = _FusedBatchNormGradGrad(op, *grad) + return grad_grad_y, grad_x, grad_scale, None, None, None + diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py index 4dd5202f6cdb2c..1131ec377fac18 100644 --- a/tensorflow/python/ops/nn_fused_batchnorm_test.py +++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py @@ -26,7 +26,7 @@ from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_grad +from tensorflow.python.ops import nn_fused_batch_norm_grad from tensorflow.python.ops import nn_impl from tensorflow.python.ops import nn_ops from tensorflow.python.platform import test @@ -336,8 +336,8 @@ def _test_grad_grad(self, epsilon = y.op.get_attr('epsilon') data_format = y.op.get_attr('data_format') grad_vals = self.evaluate([grad_x, grad_scale, grad_offset]) - grad_internal = nn_grad._BatchNormGrad(grad_y, x, scale, pop_mean, - pop_var, epsilon, data_format) + grad_internal = nn_fused_batch_norm_grad._BatchNormGrad( + grad_y, x, scale, pop_mean, pop_var, epsilon, data_format) grad_internal_vals = self.evaluate(list(grad_internal)) for grad_val, grad_internal_val in zip(grad_vals, grad_internal_vals): self.assertAllClose(grad_val, grad_internal_val, atol=err_tolerance) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 258cdd147ada3e..8260caf0787fd2 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -18,14 +18,12 @@ import itertools import operator -from tensorflow.python.eager import backprop from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import array_ops_stack from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops @ops.RegisterGradient("Conv2DBackpropInput") @@ -153,7 +151,7 @@ def _Conv3DGrad(op: ops.Operation, grad): data_format = op.get_attr("data_format").decode() shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) return [ - nn_ops.conv3d_backprop_input_v2( + gen_nn_ops.conv3d_backprop_input_v2( shape_0, op.inputs[1], grad, @@ -161,7 +159,7 @@ def _Conv3DGrad(op: ops.Operation, grad): strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), - nn_ops.conv3d_backprop_filter_v2( + gen_nn_ops.conv3d_backprop_filter_v2( op.inputs[0], shape_1, grad, @@ -178,7 +176,7 @@ def _Conv3DBackpropInputGrad(op: ops.Operation, grad): data_format = op.get_attr("data_format").decode() return [ None, - nn_ops.conv3d_backprop_filter_v2( + gen_nn_ops.conv3d_backprop_filter_v2( grad, array_ops.shape(op.inputs[1]), op.inputs[2], @@ -186,7 +184,7 @@ def _Conv3DBackpropInputGrad(op: ops.Operation, grad): strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), - nn_ops.conv3d( + gen_nn_ops.conv3d( grad, op.inputs[1], dilations=op.get_attr("dilations"), @@ -200,7 +198,7 @@ def _Conv3DBackpropInputGrad(op: ops.Operation, grad): def _Conv3DBackpropFilterGrad(op: ops.Operation, grad): data_format = op.get_attr("data_format").decode() return [ - nn_ops.conv3d_backprop_input_v2( + gen_nn_ops.conv3d_backprop_input_v2( array_ops.shape(op.inputs[0]), grad, op.inputs[2], @@ -208,7 +206,7 @@ def _Conv3DBackpropFilterGrad(op: ops.Operation, grad): strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), None, - nn_ops.conv3d( + gen_nn_ops.conv3d( op.inputs[0], grad, dilations=op.get_attr("dilations"), @@ -526,7 +524,7 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op: ops.Operation, grad_loss, grad_grad): logits = op.inputs[0] if (grad_grad is not None and not getattr(grad_grad, "_is_zeros_tensor", False)): - softmax = nn_ops.softmax(logits) + softmax = gen_nn_ops.softmax(logits) grad += ((grad_grad - array_ops.squeeze( math_ops.matmul( @@ -534,7 +532,7 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op: ops.Operation, grad_loss, grad_grad): array_ops.expand_dims(softmax, 2)), axis=1)) * softmax) - return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits)) # pylint: disable=invalid-unary-operand-type + return grad, _BroadcastMul(grad_loss, -gen_nn_ops.log_softmax(logits)) # pylint: disable=invalid-unary-operand-type @ops.RegisterGradient("SparseSoftmaxCrossEntropyWithLogits") @@ -554,7 +552,7 @@ def _SparseSoftmaxCrossEntropyWithLogitsGrad(op: ops.Operation, logits = op.inputs[0] if (grad_grad is not None and not getattr(grad_grad, "_is_zeros_tensor", False)): - softmax = nn_ops.softmax(logits) + softmax = gen_nn_ops.softmax(logits) grad += ((grad_grad - array_ops.squeeze( math_ops.matmul( @@ -633,14 +631,14 @@ def _DepthwiseConv2dNativeGrad(op: ops.Operation, grad): @ops.RegisterGradient("Dilation2D") def _Dilation2DGrad(op: ops.Operation, grad): return [ - nn_ops.dilation2d_backprop_input(op.inputs[0], op.inputs[1], grad, - op.get_attr("strides"), - op.get_attr("rates"), - op.get_attr("padding")), - nn_ops.dilation2d_backprop_filter(op.inputs[0], op.inputs[1], grad, - op.get_attr("strides"), - op.get_attr("rates"), - op.get_attr("padding")) + gen_nn_ops.dilation2d_backprop_input(op.inputs[0], op.inputs[1], grad, + op.get_attr("strides"), + op.get_attr("rates"), + op.get_attr("padding")), + gen_nn_ops.dilation2d_backprop_filter(op.inputs[0], op.inputs[1], grad, + op.get_attr("strides"), + op.get_attr("rates"), + op.get_attr("padding")) ] @@ -951,150 +949,6 @@ def _FusedBatchNormV3Grad(op: ops.Operation, *grad): return _BaseFusedBatchNormGrad(op, 2, *grad) -def _BatchNormGrad(grad_y, - x, - scale, - pop_mean, - pop_var, - epsilon, - data_format, - is_training=True): - """Returns the gradients for the 3 inputs of BatchNorm. - - Args: - grad_y: A `Tensor` of 4 or 5 dimensions for gradient for y. - x: A `Tensor` of 4 or 5 dimensions for x. - scale: A `Tensor` of 1 dimension for scaling. - pop_mean: A `Tensor` of 1 dimension for the population mean. Only used when - is_training=False. - pop_var: A `Tensor` of 1 dimension for the population variance. Only used - when is_training=False. - epsilon: A small float number added to the variance of x. - data_format: The data format for input. Either b"NHWC" or b"NCHW". - is_training: A bool value to indicate the operation is for training - (default) or inference. - - Returns: - A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient - for x, grad_scale the gradient for scale, and grad_offset the gradient - for offset. - """ - x_dtype = x.dtype.base_dtype - if x_dtype == dtypes.float16 or x_dtype == dtypes.bfloat16: - # float16 math is too imprecise, so we do the batch norm gradient - # computations in float32. - x = math_ops.cast(x, dtypes.float32) - grad_y = math_ops.cast(grad_y, dtypes.float32) - if is_training: - if data_format == b"NHWC": - keepdims = False - reduce_axis = [0, 1, 2] - elif data_format == b"NDHWC": - keepdims = False - reduce_axis = [0, 1, 2, 3] - elif data_format == b"NCHW": - keepdims = True - reduce_axis = [0, 2, 3] - shape = [1, array_ops.size(scale), 1, 1] - scale = array_ops.reshape(scale, shape) - else: - keepdims = True - reduce_axis = [0, 2, 3, 4] - shape = [1, array_ops.size(scale), 1, 1, 1] - scale = array_ops.reshape(scale, shape) - mean_grad_y = math_ops.reduce_mean(grad_y, reduce_axis, keepdims=keepdims) - mean_x = math_ops.reduce_mean(x, reduce_axis, keepdims=keepdims) - var_x = math_ops.reduce_mean( - math_ops.squared_difference(x, array_ops.stop_gradient(mean_x)), - reduce_axis, - keepdims=keepdims) - grad_y_offset = grad_y - mean_grad_y - x_offset = x - mean_x - mean = math_ops.reduce_mean( - grad_y * x_offset, axis=reduce_axis, keepdims=keepdims) - grad_x = scale * math_ops.rsqrt(var_x + epsilon) * ( - grad_y_offset - math_ops.reciprocal(var_x + epsilon) * mean * x_offset) - grad_scale = math_ops.rsqrt(var_x + epsilon) * math_ops.reduce_sum( - grad_y * x_offset, axis=reduce_axis, keepdims=keepdims) - if data_format == b"NCHW" or data_format == b"NCDHW": - grad_scale = array_ops.squeeze(grad_scale) - grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis) - return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset - else: - if data_format == b"NHWC": - reduce_axis = [0, 1, 2] - elif data_format == b"NDHWC": - reduce_axis = [0, 1, 2, 3] - elif data_format == b"NCHW": - reduce_axis = [0, 2, 3] - shape = [1, array_ops.size(pop_mean), 1, 1] - pop_mean = array_ops.reshape(pop_mean, shape) - pop_var = array_ops.reshape(pop_var, shape) - scale = array_ops.reshape(scale, shape) - else: - reduce_axis = [0, 2, 3, 4] - shape = [1, array_ops.size(pop_mean), 1, 1, 1] - pop_mean = array_ops.reshape(pop_mean, shape) - pop_var = array_ops.reshape(pop_var, shape) - scale = array_ops.reshape(scale, shape) - - grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis) - var_rsqrt = math_ops.rsqrt(pop_var + epsilon) - grad_scale = math_ops.reduce_sum( - grad_y * (x - pop_mean) * var_rsqrt, axis=reduce_axis) - grad_x = grad_y * scale * var_rsqrt - return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset - - -@ops.RegisterGradient("FusedBatchNormGrad") -def _FusedBatchNormGradGrad(op: ops.Operation, *grad): - """Returns the gradients for the 3 inputs of FusedBatchNormGrad. - - Args: - op: The FusedBatchNormGradOp for which we need to compute gradients. - *grad: An argument list for tensors of gradients wrt the outputs with - grad[0] as grad_grad_x, grad[1] as grad_grad_scale, grad[2] as - grad_grad_offset. - - Returns: - A tuple (grad_grad_y, grad_x, grad_scale, None, None), where grad_grad_y - is the gradient for grad_y, grad_x the gradient for x, grad_scale the - gradient for scale. - """ - data_format = op.get_attr("data_format") - epsilon = op.get_attr("epsilon") - is_training = op.get_attr("is_training") - grad_y = op.inputs[0] - x = op.inputs[1] - scale = op.inputs[2] - pop_mean = op.inputs[3] - pop_var = op.inputs[4] - grad_grad_x = grad[0] - grad_grad_scale = grad[1] - grad_grad_offset = grad[2] - with backprop.GradientTape() as tape: - tape.watch(grad_y) - tape.watch(x) - tape.watch(scale) - grad_x, grad_scale, grad_offset = _BatchNormGrad( - grad_y, x, scale, pop_mean, pop_var, epsilon, data_format, is_training) - grad_initial = [grad_grad_x, grad_grad_scale, grad_grad_offset] - grad_grad_y, grad_x, grad_scale = tape.gradient( - [grad_x, grad_scale, grad_offset], [grad_y, x, scale], grad_initial) - return grad_grad_y, grad_x, grad_scale, None, None - - -@ops.RegisterGradient("FusedBatchNormGradV2") -def _FusedBatchNormGradGradV2(op: ops.Operation, *grad): - return _FusedBatchNormGradGrad(op, *grad) - - -@ops.RegisterGradient("FusedBatchNormGradV3") -def _FusedBatchNormGradGradV3(op: ops.Operation, *grad): - grad_grad_y, grad_x, grad_scale, _, _ = _FusedBatchNormGradGrad(op, *grad) - return grad_grad_y, grad_x, grad_scale, None, None, None - - @ops.RegisterGradient("L2Loss") def _L2LossGrad(op: ops.Operation, grad): """Return the gradients for L2Loss. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 0146f18a5f8767..b2f624e106bf93 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -187,6 +187,8 @@ from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops +# Ensure all gradients are registered for nn_ops +from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import random_ops from tensorflow.python.ops import stateless_random_ops from tensorflow.python.ops import variables as variables_lib From f236ae3bca673d66d70e443dc8432a3e4e01bae2 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 22 Sep 2023 18:15:08 -0700 Subject: [PATCH 187/567] [stream_executor] NFC: Clean up xla/stream_executor:kernel build dependencies PiperOrigin-RevId: 567763730 --- third_party/xla/xla/pjrt/gpu/BUILD | 4 +- third_party/xla/xla/stream_executor/BUILD | 44 ++----------------- .../xla/xla/stream_executor/allocator_stats.h | 1 - third_party/xla/xla/stream_executor/dnn.h | 1 - .../xla/xla/stream_executor/host/BUILD | 1 + third_party/xla/xla/stream_executor/kernel.cc | 4 +- third_party/xla/xla/stream_executor/kernel.h | 6 +-- .../stream_executor/stream_executor_pimpl.h | 1 - .../xla/xla/tools/multihost_hlo_runner/BUILD | 1 + 9 files changed, 11 insertions(+), 52 deletions(-) diff --git a/third_party/xla/xla/pjrt/gpu/BUILD b/third_party/xla/xla/pjrt/gpu/BUILD index 12c50d4c918a80..5de730b142b637 100644 --- a/third_party/xla/xla/pjrt/gpu/BUILD +++ b/third_party/xla/xla/pjrt/gpu/BUILD @@ -25,8 +25,8 @@ cc_library( "//xla/client:client_library", "//xla/client:local_client", "//xla/service:platform_util", + "//xla/stream_executor", "//xla/stream_executor:device_mem_allocator", - "//xla/stream_executor:kernel", "@com_google_absl//absl/types:span", "@local_tsl//tsl/framework:bfc_allocator", "@local_tsl//tsl/framework:device_id_impl", @@ -63,8 +63,8 @@ cc_library( "//xla/service:executable", "//xla/service:platform_util", "//xla/service/gpu:gpu_executable_run_options", + "//xla/stream_executor", "//xla/stream_executor:device_mem_allocator", - "//xla/stream_executor:device_memory", "//xla/stream_executor:tf_allocator_adapter", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 6783578a4bc150..03f2d6ca7538d3 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -469,6 +469,7 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", @@ -501,6 +502,7 @@ cc_library( ":fft", ":host_or_device_scalar", ":kernel", + ":kernel_spec", ":launch_dim", ":platform", ":plugin_registry", @@ -531,64 +533,25 @@ cc_library( cc_library( name = "kernel", - srcs = [ - "fft.h", - "kernel.cc", - "plugin.h", - "stream.h", - "stream_executor_pimpl.h", - "temporary_device_memory.h", - "temporary_memory_manager.h", - ], + srcs = ["kernel.cc"], hdrs = [ - "blas.h", - "device_description.h", - "device_options.h", - "event.h", "kernel.h", - "kernel_spec.h", - "launch_dim.h", "multi_platform_manager.h", - "platform.h", - "plugin_registry.h", - "stream_executor.h", - "stream_executor_internal.h", - "trace_listener.h", ], visibility = ["//visibility:public"], deps = [ - ":allocator_stats", - ":data_type", - ":device_description", - ":device_description_proto_cc", ":device_memory", - ":device_options", - ":fft", - ":kernel_cache_config", - ":kernel_spec", - ":launch_dim", ":platform", - ":plugin", - ":plugin_registry", ":stream_executor_headers", "//xla/stream_executor/platform", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/log:check", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:platform_port", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", - "@local_tsl//tsl/protobuf:dnn_proto_cc", ], ) @@ -788,6 +751,7 @@ cc_library( ":dnn_proto_cc", ":event", ":kernel", + ":kernel_spec", ":launch_dim", ":multi_platform_manager", ":platform", diff --git a/third_party/xla/xla/stream_executor/allocator_stats.h b/third_party/xla/xla/stream_executor/allocator_stats.h index d073d08cb92d5b..1e15d0c51e64b9 100644 --- a/third_party/xla/xla/stream_executor/allocator_stats.h +++ b/third_party/xla/xla/stream_executor/allocator_stats.h @@ -18,7 +18,6 @@ limitations under the License. #include -#include "absl/types/optional.h" #include "xla/stream_executor/platform/port.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/dnn.h b/third_party/xla/xla/stream_executor/dnn.h index 1cf598e7f37141..68f18f635048bc 100644 --- a/third_party/xla/xla/stream_executor/dnn.h +++ b/third_party/xla/xla/stream_executor/dnn.h @@ -35,7 +35,6 @@ limitations under the License. #include #include "google/protobuf/wrappers.pb.h" -#include "absl/types/optional.h" #include "absl/types/span.h" #include "xla/stream_executor/data_type.h" #include "xla/stream_executor/device_description.h" diff --git a/third_party/xla/xla/stream_executor/host/BUILD b/third_party/xla/xla/stream_executor/host/BUILD index 1cea4b7e5485eb..21095c57941840 100644 --- a/third_party/xla/xla/stream_executor/host/BUILD +++ b/third_party/xla/xla/stream_executor/host/BUILD @@ -64,6 +64,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//xla/stream_executor:kernel", + "//xla/stream_executor:stream_executor_internal", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/synchronization", "@local_tsl//tsl/platform:denormal", diff --git a/third_party/xla/xla/stream_executor/kernel.cc b/third_party/xla/xla/stream_executor/kernel.cc index d09f25873c8555..64e6f0191f22f4 100644 --- a/third_party/xla/xla/stream_executor/kernel.cc +++ b/third_party/xla/xla/stream_executor/kernel.cc @@ -19,13 +19,13 @@ limitations under the License. #include "xla/stream_executor/kernel.h" +#include + #include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "xla/stream_executor/platform.h" -#include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/stream_executor.h" #include "tsl/platform/demangle.h" -#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/kernel.h b/third_party/xla/xla/stream_executor/kernel.h index 8991fcaa8404bf..61ba6b62f1fd85 100644 --- a/third_party/xla/xla/stream_executor/kernel.h +++ b/third_party/xla/xla/stream_executor/kernel.h @@ -75,20 +75,16 @@ limitations under the License. #include #include #include -#include +#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/kernel_cache_config.h" #include "xla/stream_executor/platform/port.h" -#include "tsl/platform/logging.h" namespace stream_executor { -class DeviceMemoryBase; -template -class DeviceMemory; class StreamExecutor; namespace internal { diff --git a/third_party/xla/xla/stream_executor/stream_executor_pimpl.h b/third_party/xla/xla/stream_executor/stream_executor_pimpl.h index 1a208a303e22ae..c41ef6db37cc61 100644 --- a/third_party/xla/xla/stream_executor/stream_executor_pimpl.h +++ b/third_party/xla/xla/stream_executor/stream_executor_pimpl.h @@ -28,7 +28,6 @@ limitations under the License. #include "absl/base/thread_annotations.h" #include "absl/functional/any_invocable.h" #include "absl/synchronization/mutex.h" -#include "absl/types/optional.h" #include "absl/types/span.h" #include "xla/stream_executor/command_buffer.h" #include "xla/stream_executor/device_memory_allocator.h" diff --git a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD index 01bff3c03b4f2f..dc1723bcce97ab 100644 --- a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD +++ b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD @@ -35,6 +35,7 @@ xla_cc_binary( ":hlo_runner_flags", "//xla:debug_options_flags", "//xla:status", + "//xla/stream_executor", "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:platform_port", From b0f08aa26133f3373d626381a58c04246dd35a11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 20:27:03 -0700 Subject: [PATCH 188/567] Add convenience method to get memory spaces by kind PiperOrigin-RevId: 567781230 --- third_party/xla/xla/pjrt/BUILD | 7 +++++++ third_party/xla/xla/pjrt/pjrt_client.h | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/third_party/xla/xla/pjrt/BUILD b/third_party/xla/xla/pjrt/BUILD index dac1259700f4ee..bf8cce6944164a 100644 --- a/third_party/xla/xla/pjrt/BUILD +++ b/third_party/xla/xla/pjrt/BUILD @@ -169,6 +169,7 @@ cc_library( hdrs = ["pjrt_client.h"], visibility = ["//visibility:public"], deps = [ + ":pjrt_common", ":pjrt_compiler", ":pjrt_device_description", ":pjrt_executable", @@ -182,13 +183,19 @@ cc_library( "//xla:xla_data_proto_cc", "//xla/client:xla_computation", "//xla/hlo/ir:hlo", + "//xla/service:computation_placer_hdr", "//xla/service:hlo_cost_analysis", "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", "@llvm-project//mlir:IR", "@local_tsl//tsl/framework:allocator", diff --git a/third_party/xla/xla/pjrt/pjrt_client.h b/third_party/xla/xla/pjrt/pjrt_client.h index 5038a3e69dd399..4c32a1b81fd180 100644 --- a/third_party/xla/xla/pjrt/pjrt_client.h +++ b/third_party/xla/xla/pjrt/pjrt_client.h @@ -16,7 +16,9 @@ limitations under the License. #ifndef XLA_PJRT_PJRT_CLIENT_H_ #define XLA_PJRT_PJRT_CLIENT_H_ +#include #include +#include #include #include #include @@ -25,20 +27,31 @@ limitations under the License. #include #include "absl/base/attributes.h" +#include "absl/base/thread_annotations.h" #include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" +#include "absl/functional/any_invocable.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" #include "absl/synchronization/notification.h" +#include "absl/time/time.h" #include "absl/types/span.h" #include "mlir/IR/BuiltinOps.h" // from @llvm-project #include "xla/client/xla_computation.h" +#include "xla/layout.h" #include "xla/literal.h" +#include "xla/pjrt/pjrt_common.h" #include "xla/pjrt/pjrt_compiler.h" #include "xla/pjrt/pjrt_device_description.h" #include "xla/pjrt/pjrt_executable.h" #include "xla/pjrt/pjrt_future.h" +#include "xla/service/computation_placer.h" #include "xla/service/hlo_cost_analysis.h" #include "xla/shape.h" +#include "xla/shape_util.h" #include "xla/status.h" #include "xla/statusor.h" #include "xla/util.h" @@ -177,6 +190,11 @@ class PjRtDevice { // Returns the default memory space attached to this device. virtual StatusOr default_memory_space() const = 0; + virtual absl::StatusOr memory_space_by_kind( + absl::string_view memory_space_kind) const { + return Unimplemented("memory_space_by_kind not implemented"); + } + // Returns a platform-specific stream handle that should be used to track when // an externally-managed buffer is ready to use on this device. This is // intended to support dlpack on GPU and is not expected to be implemented for From c2dfd70f7a3fa079dc53857571e4c406b254105c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 22 Sep 2023 22:10:53 -0700 Subject: [PATCH 189/567] [stream_executor] NFC: Clean up xla/stream_executor:kernel build dependencies PiperOrigin-RevId: 567794680 --- third_party/xla/xla/pjrt/gpu/BUILD | 4 +- third_party/xla/xla/stream_executor/BUILD | 44 +++++++++++++++++-- .../xla/xla/stream_executor/allocator_stats.h | 1 + third_party/xla/xla/stream_executor/dnn.h | 1 + .../xla/xla/stream_executor/host/BUILD | 1 - third_party/xla/xla/stream_executor/kernel.cc | 4 +- third_party/xla/xla/stream_executor/kernel.h | 6 ++- .../stream_executor/stream_executor_pimpl.h | 1 + .../xla/xla/tools/multihost_hlo_runner/BUILD | 1 - 9 files changed, 52 insertions(+), 11 deletions(-) diff --git a/third_party/xla/xla/pjrt/gpu/BUILD b/third_party/xla/xla/pjrt/gpu/BUILD index 5de730b142b637..12c50d4c918a80 100644 --- a/third_party/xla/xla/pjrt/gpu/BUILD +++ b/third_party/xla/xla/pjrt/gpu/BUILD @@ -25,8 +25,8 @@ cc_library( "//xla/client:client_library", "//xla/client:local_client", "//xla/service:platform_util", - "//xla/stream_executor", "//xla/stream_executor:device_mem_allocator", + "//xla/stream_executor:kernel", "@com_google_absl//absl/types:span", "@local_tsl//tsl/framework:bfc_allocator", "@local_tsl//tsl/framework:device_id_impl", @@ -63,8 +63,8 @@ cc_library( "//xla/service:executable", "//xla/service:platform_util", "//xla/service/gpu:gpu_executable_run_options", - "//xla/stream_executor", "//xla/stream_executor:device_mem_allocator", + "//xla/stream_executor:device_memory", "//xla/stream_executor:tf_allocator_adapter", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 03f2d6ca7538d3..6783578a4bc150 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -469,7 +469,6 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", @@ -502,7 +501,6 @@ cc_library( ":fft", ":host_or_device_scalar", ":kernel", - ":kernel_spec", ":launch_dim", ":platform", ":plugin_registry", @@ -533,25 +531,64 @@ cc_library( cc_library( name = "kernel", - srcs = ["kernel.cc"], + srcs = [ + "fft.h", + "kernel.cc", + "plugin.h", + "stream.h", + "stream_executor_pimpl.h", + "temporary_device_memory.h", + "temporary_memory_manager.h", + ], hdrs = [ + "blas.h", + "device_description.h", + "device_options.h", + "event.h", "kernel.h", + "kernel_spec.h", + "launch_dim.h", "multi_platform_manager.h", + "platform.h", + "plugin_registry.h", + "stream_executor.h", + "stream_executor_internal.h", + "trace_listener.h", ], visibility = ["//visibility:public"], deps = [ + ":allocator_stats", + ":data_type", + ":device_description", + ":device_description_proto_cc", ":device_memory", + ":device_options", + ":fft", + ":kernel_cache_config", + ":kernel_spec", + ":launch_dim", ":platform", + ":plugin", + ":plugin_registry", ":stream_executor_headers", "//xla/stream_executor/platform", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/log:check", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", + "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:platform_port", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", + "@local_tsl//tsl/protobuf:dnn_proto_cc", ], ) @@ -751,7 +788,6 @@ cc_library( ":dnn_proto_cc", ":event", ":kernel", - ":kernel_spec", ":launch_dim", ":multi_platform_manager", ":platform", diff --git a/third_party/xla/xla/stream_executor/allocator_stats.h b/third_party/xla/xla/stream_executor/allocator_stats.h index 1e15d0c51e64b9..d073d08cb92d5b 100644 --- a/third_party/xla/xla/stream_executor/allocator_stats.h +++ b/third_party/xla/xla/stream_executor/allocator_stats.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "absl/types/optional.h" #include "xla/stream_executor/platform/port.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/dnn.h b/third_party/xla/xla/stream_executor/dnn.h index 68f18f635048bc..1cf598e7f37141 100644 --- a/third_party/xla/xla/stream_executor/dnn.h +++ b/third_party/xla/xla/stream_executor/dnn.h @@ -35,6 +35,7 @@ limitations under the License. #include #include "google/protobuf/wrappers.pb.h" +#include "absl/types/optional.h" #include "absl/types/span.h" #include "xla/stream_executor/data_type.h" #include "xla/stream_executor/device_description.h" diff --git a/third_party/xla/xla/stream_executor/host/BUILD b/third_party/xla/xla/stream_executor/host/BUILD index 21095c57941840..1cea4b7e5485eb 100644 --- a/third_party/xla/xla/stream_executor/host/BUILD +++ b/third_party/xla/xla/stream_executor/host/BUILD @@ -64,7 +64,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//xla/stream_executor:kernel", - "//xla/stream_executor:stream_executor_internal", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/synchronization", "@local_tsl//tsl/platform:denormal", diff --git a/third_party/xla/xla/stream_executor/kernel.cc b/third_party/xla/xla/stream_executor/kernel.cc index 64e6f0191f22f4..d09f25873c8555 100644 --- a/third_party/xla/xla/stream_executor/kernel.cc +++ b/third_party/xla/xla/stream_executor/kernel.cc @@ -19,13 +19,13 @@ limitations under the License. #include "xla/stream_executor/kernel.h" -#include - #include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "xla/stream_executor/platform.h" +#include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/stream_executor.h" #include "tsl/platform/demangle.h" +#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/kernel.h b/third_party/xla/xla/stream_executor/kernel.h index 61ba6b62f1fd85..8991fcaa8404bf 100644 --- a/third_party/xla/xla/stream_executor/kernel.h +++ b/third_party/xla/xla/stream_executor/kernel.h @@ -75,16 +75,20 @@ limitations under the License. #include #include #include +#include -#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/kernel_cache_config.h" #include "xla/stream_executor/platform/port.h" +#include "tsl/platform/logging.h" namespace stream_executor { +class DeviceMemoryBase; +template +class DeviceMemory; class StreamExecutor; namespace internal { diff --git a/third_party/xla/xla/stream_executor/stream_executor_pimpl.h b/third_party/xla/xla/stream_executor/stream_executor_pimpl.h index c41ef6db37cc61..1a208a303e22ae 100644 --- a/third_party/xla/xla/stream_executor/stream_executor_pimpl.h +++ b/third_party/xla/xla/stream_executor/stream_executor_pimpl.h @@ -28,6 +28,7 @@ limitations under the License. #include "absl/base/thread_annotations.h" #include "absl/functional/any_invocable.h" #include "absl/synchronization/mutex.h" +#include "absl/types/optional.h" #include "absl/types/span.h" #include "xla/stream_executor/command_buffer.h" #include "xla/stream_executor/device_memory_allocator.h" diff --git a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD index dc1723bcce97ab..01bff3c03b4f2f 100644 --- a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD +++ b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD @@ -35,7 +35,6 @@ xla_cc_binary( ":hlo_runner_flags", "//xla:debug_options_flags", "//xla:status", - "//xla/stream_executor", "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:platform_port", From 9a4bde88bf5779ef0f39ef46eda4dc4e6090cf37 Mon Sep 17 00:00:00 2001 From: Jared Junyoung Lim Date: Sat, 23 Sep 2023 00:09:02 -0700 Subject: [PATCH 190/567] Sub supports broadcasting up to 6 dimensions PiperOrigin-RevId: 567812015 --- RELEASE.md | 2 +- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 4 +- .../compiler/mlir/lite/tests/legalize-tf.mlir | 25 +- tensorflow/compiler/mlir/lite/tests/ops.mlir | 11 +- tensorflow/lite/kernels/BUILD | 3 +- .../lite/kernels/internal/reference/sub.h | 350 ++++----- tensorflow/lite/kernels/sub_test.cc | 672 +++++++++++++++++- 7 files changed, 825 insertions(+), 242 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 465dc46bed5cf1..5e6cc62a627a62 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -45,7 +45,7 @@ * `tf.lite`: - * `mul_op` supports broadcasting up to 6 dimensions. + * `sub_op` and `mul_op` support broadcasting up to 6 dimensions. * The `tflite::SignatureRunner` class, which provides support for named parameters and for multiple named computations within a single TF Lite diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 19fb5b3b4e0d41..939d840f404445 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -364,7 +364,7 @@ bool VerifySubOpShapeConstraints(SubOp op) { IsQI16Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); + /*max_bcast_rank=*/6); } // Allows QI8 output when the operands have valid shapes, which are @@ -372,7 +372,7 @@ bool VerifySubOpShapeConstraints(SubOp op) { if (IsQI8Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/4); + /*max_bcast_rank=*/6); } return false; } diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 953bd4a455f0dc..444a494f73769a 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -1925,16 +1925,29 @@ func.func @add_with_int32_7d_inputs(%arg0: tensor<1x1x1x1x1x3x1xi32>, %arg1 : te // CHECK: %2 = tfl.add %0, %1 {fused_activation_function = "NONE"} : tensor<1x1x1x1x1x3x4xi32> } -// CHECK-LABEL: testSubWithBroadcastToOps -func.func @testSubWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.sub [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> +func.func @test5DSubWithImplicitBroadcast(%arg0: tensor<1x1x1x3x1xi32>, %arg1 : tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> { + %0 = "tf.Sub"(%arg0, %arg1): (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> + func.return %0 : tensor<1x1x1x3x4xi32> +// CHECK-LABEL: test5DSubWithImplicitBroadcast +// CHECK: %0 = tfl.sub(%arg0, %arg1) {fused_activation_function = "NONE"} : (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> +} + +func.func @test6DSubWithImplicitBroadcast(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { +// CHECK-LABEL: test6DSubWithImplicitBroadcast +// CHECK: %0 = tfl.sub(%arg0, %arg1) {fused_activation_function = "NONE"} : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> %0 = "tf.Sub"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> func.return %0 : tensor<1x2x3x4x5x6xi32> } +func.func @sub_with_int32_7d_inputs(%arg0: tensor<1x1x1x1x1x3x1xi32>, %arg1 : tensor<1x1x1x1x1x1x4xi32>) -> tensor<1x1x1x1x1x3x4xi32> { + %0 = "tf.Sub"(%arg0, %arg1): (tensor<1x1x1x1x1x3x1xi32>, tensor<1x1x1x1x1x1x4xi32>) -> tensor<1x1x1x1x1x3x4xi32> + func.return %0 : tensor<1x1x1x1x1x3x4xi32> +// CHECK-LABEL: sub_with_int32_7d_inputs +// CHECK: %0 = "tfl.broadcast_to"(%arg0, %cst) : (tensor<1x1x1x1x1x3x1xi32>, tensor<7xi64>) -> tensor<1x1x1x1x1x3x4xi32> +// CHECK: %1 = "tfl.broadcast_to"(%arg1, %cst) : (tensor<1x1x1x1x1x1x4xi32>, tensor<7xi64>) -> tensor<1x1x1x1x1x3x4xi32> +// CHECK: %2 = tfl.sub %0, %1 {fused_activation_function = "NONE"} : tensor<1x1x1x1x1x3x4xi32> +} + func.func @test5DMulWithImplicitBroadcast(%arg0: tensor<1x1x1x3x1xi32>, %arg1 : tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> { %0 = "tf.Mul"(%arg0, %arg1): (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> func.return %0 : tensor<1x1x1x3x4xi32> diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 28ff3abdc67b65..a2ca1158c5405f 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -411,9 +411,18 @@ func.func @add_with_quantized_i16_broadcasting(tensor<2x2xf32>, tensor<1xf32>) - // ----- +// CHECK-LABEL: sub_with_i32_five_dim_broadcasting +func.func @sub_with_i32_five_dim_broadcasting(tensor<1x1x1x1x1xi32>, tensor<1xi32>) -> tensor<1x1x1x1x1xi32> { +^bb0(%arg0: tensor<1x1x1x1x1xi32>, %arg1: tensor<1xi32>): + // CHECK: tfl.sub(%arg0, %arg1) {fused_activation_function = "RELU6"} + %0 = "tfl.sub"(%arg0, %arg1) {fused_activation_function = "RELU6"} : (tensor<1x1x1x1x1xi32>, tensor<1xi32>) -> tensor<1x1x1x1x1xi32> + func.return %0#0 : tensor<1x1x1x1x1xi32> +} + +// ----- + func.func @sub_with_quantized_i8_five_dim_broadcasting(tensor<1x1x1x1x1xf32>, tensor<1xf32>) -> tensor<1x1x1x1x1x!quant.any> { ^bb0(%arg0: tensor<1x1x1x1x1xf32>, %arg1: tensor<1xf32>): - // expected-error @+1 {{Operands do not have valid shapes}} %0 = "tfl.sub"(%arg0, %arg1) {fused_activation_function = "RELU6"} : (tensor<1x1x1x1x1xf32>, tensor<1xf32>) -> tensor<1x1x1x1x1x!quant.any> func.return %0#0 : tensor<1x1x1x1x1x!quant.any> } diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index f69a197d0b1eb0..b641ecdeadae14 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -1181,8 +1181,9 @@ cc_test( cc_test( name = "sub_test", - size = "small", + size = "medium", srcs = ["sub_test.cc"], + shard_count = 10, tags = ["tflite_nnapi"], deps = [ ":test_main", diff --git a/tensorflow/lite/kernels/internal/reference/sub.h b/tensorflow/lite/kernels/internal/reference/sub.h index d0ebc95ada0851..50e8ec1ee06f53 100644 --- a/tensorflow/lite/kernels/internal/reference/sub.h +++ b/tensorflow/lite/kernels/internal/reference/sub.h @@ -29,6 +29,9 @@ namespace tflite { namespace reference_ops { +// Maximum dimension supported by the broadcast sub operation. +constexpr int kMaxSubBroadcastDim = 6; + inline void SubNonBroadcast(const ArithmeticParams& params, const RuntimeShape& input1_shape, const float* input1_data, @@ -61,144 +64,61 @@ inline void SubNonBroadcast(const ArithmeticParams& params, } } +template +void BroadcastSubRecursiveDimensions( + const ArithmeticParams& params, int dimension, const T* input1_data, + const T* input2_data, T* output_data, size_t* input1_offset_p, + size_t* input2_offset_p, size_t* output_offset, + const NdArrayDesc& desc1, + const NdArrayDesc& desc2, + const int32_t extended_output_shape_dims[kMaxSubBroadcastDim], + F binary_func) { + if (dimension == kMaxSubBroadcastDim - 1) { + for (int c = 0; c < extended_output_shape_dims[dimension]; ++c) { + const T input1_val = input1_data[*input1_offset_p]; + const T input2_val = input2_data[*input2_offset_p]; + output_data[*output_offset] = binary_func(params, input1_val, input2_val); + *input1_offset_p += desc1.strides[dimension]; + *input2_offset_p += desc2.strides[dimension]; + ++(*output_offset); + } + } else { + for (int a = 0; a < extended_output_shape_dims[dimension]; ++a) { + size_t input1_offset_c = *input1_offset_p; + size_t input2_offset_c = *input2_offset_p; + BroadcastSubRecursiveDimensions( + params, dimension + 1, input1_data, input2_data, output_data, + &input1_offset_c, &input2_offset_c, output_offset, desc1, desc2, + extended_output_shape_dims, binary_func); + *input1_offset_p += desc1.strides[dimension]; + *input2_offset_p += desc2.strides[dimension]; + } + } +} + // TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then // generate max(D1, D2) nested for loops. -template -inline void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const float* input1_data, - const RuntimeShape& input2_shape, - const float* input2_data, - const RuntimeShape& output_shape, - float* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/float"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.float_activation_min, params.float_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -inline void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int32_t* input1_data, - const RuntimeShape& input2_shape, - const int32_t* input2_data, - const RuntimeShape& output_shape, - int32_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.quantized_activation_min, params.quantized_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int64_t* input1_data, - const RuntimeShape& input2_shape, - const int64_t* input2_data, - const RuntimeShape& output_shape, int64_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.int64_activation_min, params.int64_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template +template void BroadcastSubSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/templated"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; + ruy::profiler::ScopeLabel label("BroadcastSubSlow/T"); + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxSubBroadcastDim); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxSubBroadcastDim); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxSubBroadcastDim); + NdArrayDesc desc1; + NdArrayDesc desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); + // Cache output shape dimensions. + int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; + std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), + sizeof(extended_output_shape_dims)); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -211,17 +131,21 @@ void BroadcastSubSlow(const ArithmeticParams& params, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.quantized_activation_min, params.quantized_activation_max); - }; - NDOpsHelper(output_desc, sub_func); + size_t input1_offset = 0; + size_t input2_offset = 0; + size_t output_offset = 0; + BroadcastSubRecursiveDimensions( + params, 0, input1_data, input2_data, output_data, &input1_offset, + &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, + [](const ArithmeticParams& params, const T input1_val, + const T input2_val) { + T activation_min, activation_max; + GetActivationParams(params, &activation_min, &activation_max); + return ActivationFunctionWithMinMax(input1_val - input2_val, + activation_min, activation_max); + }); } -template inline void BroadcastSub16POTSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, const int16_t* input1_data, @@ -230,12 +154,19 @@ inline void BroadcastSub16POTSlow(const ArithmeticParams& params, const RuntimeShape& output_shape, int16_t* output_data) { ruy::profiler::ScopeLabel label("BroadcastSub16POTSlow/int16_t"); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxSubBroadcastDim); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxSubBroadcastDim); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxSubBroadcastDim); + NdArrayDesc desc1; + NdArrayDesc desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); + // Cache output shape dimensions. + int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; + std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), + sizeof(extended_output_shape_dims)); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -248,24 +179,27 @@ inline void BroadcastSub16POTSlow(const ArithmeticParams& params, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - auto sub_func = [&](int indexes[N]) { - const int32_t input1_val = input1_data[SubscriptToIndex(desc1, indexes)]; - const int32_t input2_val = input2_data[SubscriptToIndex(desc2, indexes)]; - const int32_t scaled_input1_val = - gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift); - const int32_t scaled_input2_val = - gemmlowp::RoundingDivideByPOT(input2_val, -params.input2_shift); - const int32_t raw_output = scaled_input1_val - scaled_input2_val; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[SubscriptToIndex(output_desc, indexes)] = - static_cast(clamped_output); - }; - NDOpsHelper(output_desc, sub_func); + size_t input1_offset = 0; + size_t input2_offset = 0; + size_t output_offset = 0; + BroadcastSubRecursiveDimensions( + params, 0, input1_data, input2_data, output_data, &input1_offset, + &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, + [](const ArithmeticParams& params, const int16_t input1_val, + const int16_t input2_val) { + const int32_t scaled_input1_val = + gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift); + const int32_t scaled_input2_val = + gemmlowp::RoundingDivideByPOT(input2_val, -params.input2_shift); + const int32_t raw_output = scaled_input1_val - scaled_input2_val; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + return static_cast(clamped_output); + }); } -template +template void BroadcastQuantSubSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, @@ -273,15 +207,19 @@ void BroadcastQuantSubSlow(const ArithmeticParams& params, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { ruy::profiler::ScopeLabel label("BroadcastQuantSubSlow/T"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxSubBroadcastDim); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxSubBroadcastDim); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxSubBroadcastDim); + NdArrayDesc desc1; + NdArrayDesc desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); + // Cache output shape dimensions. + int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; + std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), + sizeof(extended_output_shape_dims)); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -294,31 +232,36 @@ void BroadcastQuantSubSlow(const ArithmeticParams& params, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - auto sub_func = [&](int indexes[N]) { - const int32_t input1_val = - params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)]; - const int32_t input2_val = - params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)]; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_sub = scaled_input1_val - scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sub, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[SubscriptToIndex(output_desc, indexes)] = - static_cast(clamped_output); - }; - NDOpsHelper(output_desc, sub_func); + size_t input1_offset = 0; + size_t input2_offset = 0; + size_t output_offset = 0; + BroadcastSubRecursiveDimensions( + params, 0, input1_data, input2_data, output_data, &input1_offset, + &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, + [](const ArithmeticParams& params, const T input1_val, + const T input2_val) { + const int32_t shifted_input1_val = + (params.input1_offset + input1_val) * (1 << params.left_shift); + const int32_t shifted_input2_val = + (params.input2_offset + input2_val) * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, + params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, + params.input2_shift); + const int32_t raw_sub = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sub, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + return static_cast(clamped_output); + }); } // Element-wise add that can often be used for inner loop of broadcast add as @@ -405,12 +348,16 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; + NdArrayDesc desc1; + NdArrayDesc desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); + RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); + // Cache output shape dimensions. + int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; + std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), + sizeof(extended_output_shape_dims)); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -423,17 +370,14 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = - input1_data[SubscriptToIndex(desc1, b, y, x, c)] - - input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - } - } - } - } + size_t input1_offset = 0; + size_t input2_offset = 0; + size_t output_offset = 0; + BroadcastSubRecursiveDimensions( + params, 0, input1_data, input2_data, output_data, &input1_offset, + &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, + [](const ArithmeticParams& params, const T input1_val, + const T input2_val) { return input1_val - input2_val; }); } inline void SetActivationMinMax(const ArithmeticParams& params, diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 5821fd302aa4e9..88ede0f922aa06 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -14,10 +14,17 @@ limitations under the License. ==============================================================================*/ #include +#include +#include +#include +#include #include +#include +#include #include #include +#include #include #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/kernels/test_util.h" @@ -38,6 +45,7 @@ class BaseSubOpModel : public SingleOpModel { output_ = AddOutput(output); SetBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions, CreateSubOptions(builder_, activation_type).Union()); + SetBypassDefaultDelegates(); BuildInterpreter({GetShape(input1_), GetShape(input2_)}); } @@ -63,14 +71,10 @@ class IntegerSubOpModel : public BaseSubOpModel { public: using BaseSubOpModel::BaseSubOpModel; - std::vector GetOutput() { return ExtractVector(output_); } -}; - -class Int64SubOpModel : public BaseSubOpModel { - public: - using BaseSubOpModel::BaseSubOpModel; - - std::vector GetOutput() { return ExtractVector(output_); } + template + std::vector GetOutput() { + return ExtractVector(output_); + } }; class QuantizedSubOpModel : public BaseSubOpModel { @@ -250,7 +254,7 @@ TEST(IntegerSubOpModel, NoActivation) { m.PopulateTensor(m.input1(), {-20, 2, 7, 8}); m.PopulateTensor(m.input2(), {1, 2, 3, 5}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3})); } TEST(IntegerSubOpModel, ActivationRELU_N1_TO_1) { @@ -260,7 +264,7 @@ TEST(IntegerSubOpModel, ActivationRELU_N1_TO_1) { m.PopulateTensor(m.input1(), {-20, 2, 7, 8}); m.PopulateTensor(m.input2(), {1, 2, 3, 5}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 0, 1, 1})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 0, 1, 1})); } TEST(IntegerSubOpModel, VariousInputShapes) { @@ -273,7 +277,7 @@ TEST(IntegerSubOpModel, VariousInputShapes) { m.PopulateTensor(m.input1(), {-20, 2, 7, 8, 11, 20}); m.PopulateTensor(m.input2(), {1, 2, 3, 5, 11, 1}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3, 0, 19})) + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3, 0, 19})) << "With shape number " << i; } } @@ -288,43 +292,43 @@ TEST(IntegerSubOpModel, WithBroadcast) { m.PopulateTensor(m.input1(), {-20, 2, 7, 8, 11, 20}); m.PopulateTensor(m.input2(), {1}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({-21, 1, 6, 7, 10, 19}))) << "With shape number " << i; } } TEST(Int64SubOpModel, NoActivation) { - Int64SubOpModel m({TensorType_INT64, {1, 2, 2, 1}}, - {TensorType_INT64, {1, 2, 2, 1}}, {TensorType_INT64, {}}, - ActivationFunctionType_NONE); + IntegerSubOpModel m({TensorType_INT64, {1, 2, 2, 1}}, + {TensorType_INT64, {1, 2, 2, 1}}, {TensorType_INT64, {}}, + ActivationFunctionType_NONE); m.PopulateTensor(m.input1(), {-20, 2, 7, 8}); m.PopulateTensor(m.input2(), {1, 2, 3, 5}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3})); } TEST(Int64SubOpModel, ActivationRELU_N1_TO_1) { - Int64SubOpModel m({TensorType_INT64, {1, 2, 2, 1}}, - {TensorType_INT64, {1, 2, 2, 1}}, {TensorType_INT64, {}}, - ActivationFunctionType_RELU_N1_TO_1); + IntegerSubOpModel m({TensorType_INT64, {1, 2, 2, 1}}, + {TensorType_INT64, {1, 2, 2, 1}}, {TensorType_INT64, {}}, + ActivationFunctionType_RELU_N1_TO_1); m.PopulateTensor(m.input1(), {-20, 2, 7, 8}); m.PopulateTensor(m.input2(), {1, 2, 3, 5}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 0, 1, 1})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 0, 1, 1})); } TEST(Int64SubOpModel, VariousInputShapes) { std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { - Int64SubOpModel m({TensorType_INT64, test_shapes[i]}, - {TensorType_INT64, test_shapes[i]}, - {TensorType_INT64, {}}, ActivationFunctionType_NONE); + IntegerSubOpModel m({TensorType_INT64, test_shapes[i]}, + {TensorType_INT64, test_shapes[i]}, + {TensorType_INT64, {}}, ActivationFunctionType_NONE); m.PopulateTensor(m.input1(), {-20, 2, 7, 8, 11, 20}); m.PopulateTensor(m.input2(), {1, 2, 3, 5, 11, 1}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3, 0, 19})) + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-21, 0, 4, 3, 0, 19})) << "With shape number " << i; } } @@ -333,13 +337,13 @@ TEST(Int64SubOpModel, WithBroadcast) { std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}, {1, 3, 1, 2, 1}}; for (int i = 0; i < test_shapes.size(); ++i) { - Int64SubOpModel m({TensorType_INT64, test_shapes[i]}, - {TensorType_INT64, {}}, // always a scalar - {TensorType_INT64, {}}, ActivationFunctionType_NONE); + IntegerSubOpModel m({TensorType_INT64, test_shapes[i]}, + {TensorType_INT64, {}}, // always a scalar + {TensorType_INT64, {}}, ActivationFunctionType_NONE); m.PopulateTensor(m.input1(), {-20, 2, 7, 8, 11, 20}); m.PopulateTensor(m.input2(), {1}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({-21, 1, 6, 7, 10, 19}))) << "With shape number " << i; } @@ -592,5 +596,617 @@ TEST(QuantizedSubOpModel, QuantizedTestsReluActivationBroadcastInt16) { } } +constexpr int kDim1 = 2; +constexpr int kDim2 = 3; +constexpr int kDim3 = 4; +constexpr int kDim4 = 5; +constexpr int kDim5 = 6; +constexpr int kDim6 = 7; + +constexpr int kMaxBroadcastDim = 6; + +void TestFloatBroadcast(std::vector input1_shape, + std::vector input2_shape) { + std::array input1_dims; + std::array input2_dims; + std::array output_dims; + std::array input1_strides; + std::array input2_strides; + std::array output_strides; + std::fill(input1_dims.begin(), input1_dims.end(), 1); + std::fill(input2_dims.begin(), input2_dims.end(), 1); + std::fill(output_dims.begin(), output_dims.end(), 1); + std::copy(input1_shape.cbegin(), input1_shape.cend(), + input1_dims.end() - input1_shape.size()); + std::copy(input2_shape.cbegin(), input2_shape.cend(), + input2_dims.end() - input2_shape.size()); + + for (size_t i = 0; i < kMaxBroadcastDim; i++) { + if (input1_dims[i] != 1 && input2_dims[i] != 1) { + ASSERT_EQ(input1_dims[i], input2_dims[i]); + } + output_dims[i] = std::max(input1_dims[i], input2_dims[i]); + } + // Compute generalized strides. + size_t input1_stride = 1, input2_stride = 1, output_stride = 1; + for (size_t i = kMaxBroadcastDim; i != 0; i--) { + input1_strides[i - 1] = input1_dims[i - 1] == 1 ? 0 : input1_stride; + input2_strides[i - 1] = input2_dims[i - 1] == 1 ? 0 : input2_stride; + output_strides[i - 1] = output_stride; + input1_stride *= input1_dims[i - 1]; + input2_stride *= input2_dims[i - 1]; + output_stride *= output_dims[i - 1]; + } + const int num_input1_elements = std::accumulate( + input1_dims.begin(), input1_dims.end(), 1, std::multiplies()); + const int num_input2_elements = std::accumulate( + input2_dims.begin(), input2_dims.end(), 1, std::multiplies()); + const int num_output_elements = std::accumulate( + output_dims.begin(), output_dims.end(), 1, std::multiplies()); + std::vector input1(num_input1_elements); + std::vector input2(num_input2_elements); + std::vector output_ref(num_output_elements); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + std::uniform_real_distribution f32dist(0.01f, 1.0f); + + std::generate(input1.begin(), input1.end(), [&]() { return f32dist(rng); }); + std::generate(input2.begin(), input2.end(), [&]() { return f32dist(rng); }); + + // Compute reference results. + for (size_t i = 0; i < output_dims[0]; i++) { + for (size_t j = 0; j < output_dims[1]; j++) { + for (size_t k = 0; k < output_dims[2]; k++) { + for (size_t l = 0; l < output_dims[3]; l++) { + for (size_t m = 0; m < output_dims[4]; m++) { + for (size_t n = 0; n < output_dims[5]; n++) { + output_ref[i * output_strides[0] + j * output_strides[1] + + k * output_strides[2] + l * output_strides[3] + + m * output_strides[4] + n * output_strides[5]] = + input1[i * input1_strides[0] + j * input1_strides[1] + + k * input1_strides[2] + l * input1_strides[3] + + m * input1_strides[4] + n * input1_strides[5]] - + input2[i * input2_strides[0] + j * input2_strides[1] + + k * input2_strides[2] + l * input2_strides[3] + + m * input2_strides[4] + n * input2_strides[5]]; + } + } + } + } + } + } + + FloatSubOpModel m({TensorType_FLOAT32, input1_shape}, + {TensorType_FLOAT32, input2_shape}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), input1); + m.PopulateTensor(m.input2(), input2); + ASSERT_EQ(m.Invoke(), kTfLiteOk); + EXPECT_THAT(m.GetOutput(), testing::ContainerEq(output_ref)); +} + +template +void TestIntegerBroadcast(std::vector input1_shape, + std::vector input2_shape) { + std::array input1_dims; + std::array input2_dims; + std::array output_dims; + std::array input1_strides; + std::array input2_strides; + std::array output_strides; + std::fill(input1_dims.begin(), input1_dims.end(), 1); + std::fill(input2_dims.begin(), input2_dims.end(), 1); + std::fill(output_dims.begin(), output_dims.end(), 1); + std::copy(input1_shape.cbegin(), input1_shape.cend(), + input1_dims.end() - input1_shape.size()); + std::copy(input2_shape.cbegin(), input2_shape.cend(), + input2_dims.end() - input2_shape.size()); + + for (size_t i = 0; i < kMaxBroadcastDim; i++) { + if (input1_dims[i] != 1 && input2_dims[i] != 1) { + ASSERT_EQ(input1_dims[i], input2_dims[i]); + } + output_dims[i] = std::max(input1_dims[i], input2_dims[i]); + } + // Compute generalized strides. + size_t input1_stride = 1, input2_stride = 1, output_stride = 1; + for (size_t i = kMaxBroadcastDim; i != 0; i--) { + input1_strides[i - 1] = input1_dims[i - 1] == 1 ? 0 : input1_stride; + input2_strides[i - 1] = input2_dims[i - 1] == 1 ? 0 : input2_stride; + output_strides[i - 1] = output_stride; + input1_stride *= input1_dims[i - 1]; + input2_stride *= input2_dims[i - 1]; + output_stride *= output_dims[i - 1]; + } + const int num_input1_elements = std::accumulate( + input1_dims.begin(), input1_dims.end(), 1, std::multiplies()); + const int num_input2_elements = std::accumulate( + input2_dims.begin(), input2_dims.end(), 1, std::multiplies()); + const int num_output_elements = std::accumulate( + output_dims.begin(), output_dims.end(), 1, std::multiplies()); + std::vector input1(num_input1_elements); + std::vector input2(num_input2_elements); + std::vector output_ref(num_output_elements); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + std::uniform_int_distribution dist(0, 256); + + std::generate(input1.begin(), input1.end(), [&]() { return dist(rng); }); + std::generate(input2.begin(), input2.end(), [&]() { return dist(rng); }); + + // Compute reference results. + for (size_t i = 0; i < output_dims[0]; i++) { + for (size_t j = 0; j < output_dims[1]; j++) { + for (size_t k = 0; k < output_dims[2]; k++) { + for (size_t l = 0; l < output_dims[3]; l++) { + for (size_t m = 0; m < output_dims[4]; m++) { + for (size_t n = 0; n < output_dims[5]; n++) { + output_ref[i * output_strides[0] + j * output_strides[1] + + k * output_strides[2] + l * output_strides[3] + + m * output_strides[4] + n * output_strides[5]] = + input1[i * input1_strides[0] + j * input1_strides[1] + + k * input1_strides[2] + l * input1_strides[3] + + m * input1_strides[4] + n * input1_strides[5]] - + input2[i * input2_strides[0] + j * input2_strides[1] + + k * input2_strides[2] + l * input2_strides[3] + + m * input2_strides[4] + n * input2_strides[5]]; + } + } + } + } + } + } + + IntegerSubOpModel m({GetTensorType(), input1_shape}, + {GetTensorType(), input2_shape}, + {GetTensorType(), {}}, + ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), input1); + m.PopulateTensor(m.input2(), input2); + ASSERT_EQ(m.Invoke(), kTfLiteOk); + EXPECT_THAT(m.GetOutput(), testing::ContainerEq(output_ref)); +} + +// To improve automatic test sharding (via shard_count in the BUILD file), +// we need to ensure that each individual test case runs in a reasonable time, +// otherwise we end up being limited by the performance of the longest shard. +// Since TestFloat32MultiDimBroadcast has 2^12 iterations, it takes a +// long time (over 30 seconds) to execute all iterations -- too long for a +// single shard. So we split it into a few "subshards" and have a separate +// TYPED_TEST macro invocation for each subshard. + +void TestFloat32MultiDimBroadcast(int selected_subshard, int subshard_count) { + int iteration = 0; + for (uint32_t bm1 = 0; bm1 < (static_cast(1) << kMaxBroadcastDim); + bm1++) { + for (uint32_t bm2 = 0; bm2 < (static_cast(1) << kMaxBroadcastDim); + bm2++) { + if (iteration++ % subshard_count != selected_subshard) { + continue; // This iteration of the loop is not part of this subshard. + } + const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); + const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); + const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); + const bool input1_broadcast_dim4 = bm1 & (static_cast(1) << 3); + const bool input1_broadcast_dim5 = bm1 & (static_cast(1) << 4); + const bool input1_broadcast_dim6 = bm1 & (static_cast(1) << 5); + const bool input2_broadcast_dim1 = bm2 & (static_cast(1) << 0); + const bool input2_broadcast_dim2 = bm2 & (static_cast(1) << 1); + const bool input2_broadcast_dim3 = bm2 & (static_cast(1) << 2); + const bool input2_broadcast_dim4 = bm2 & (static_cast(1) << 3); + const bool input2_broadcast_dim5 = bm2 & (static_cast(1) << 4); + const bool input2_broadcast_dim6 = bm2 & (static_cast(1) << 5); + const int input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1; + const int input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2; + const int input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3; + const int input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4; + const int input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5; + const int input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6; + const int input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1; + const int input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2; + const int input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3; + const int input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4; + const int input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5; + const int input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6; + std::vector input1_full_shape{input1_dim1, input1_dim2, input1_dim3, + input1_dim4, input1_dim5, input1_dim6}; + std::vector input2_full_shape{input2_dim1, input2_dim2, input2_dim3, + input2_dim4, input2_dim5, input2_dim6}; + for (int input1_dims = 1; input1_dims <= kMaxBroadcastDim; + ++input1_dims) { + for (int input2_dims = 1; input2_dims <= kMaxBroadcastDim; + ++input2_dims) { + std::vector input1_shape(input1_dims), input2_shape(input2_dims); + std::copy(input1_full_shape.end() - input1_dims, + input1_full_shape.end(), input1_shape.data()); + std::copy(input2_full_shape.end() - input2_dims, + input2_full_shape.end(), input2_shape.data()); + TestFloatBroadcast(input1_shape, input2_shape); + } + } + } + } +} + +// Should match the number of TEST or TYPED_TEST invoations for each of +// Float32MultiDimBroadcastSubshard*, +// IntegerMultiDimBroadcastSubshard*, +// Int8QuantizedMultiDimBroadcastSubshard*, and +// Uint8QuantizedMultiDimBroadcastSubshard* below. +constexpr int kMultiDimBroadcastSubshardCount = 10; + +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard0) { + TestFloat32MultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard1) { + TestFloat32MultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard2) { + TestFloat32MultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard3) { + TestFloat32MultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard4) { + TestFloat32MultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard5) { + TestFloat32MultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard6) { + TestFloat32MultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard7) { + TestFloat32MultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard8) { + TestFloat32MultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TEST(FloatSubOpModel, Float32MultiDimBroadcastSubshard9) { + TestFloat32MultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + +template +class IntegerSubOpTest : public ::testing::Test {}; + +using Int32Or64Types = ::testing::Types; +TYPED_TEST_SUITE(IntegerSubOpTest, Int32Or64Types); + +// To improve automatic test sharding (via shard_count in the BUILD file), +// we need to ensure that each individual test case runs in a reasonable time, +// otherwise we end up being limited by the performance of the longest shard. +// Since TestIntegerMultiDimBroadcast has 2^12 iterations, it takes a +// long time (over 30 seconds) to execute all iterations -- too long for a +// single shard. So we split it into a few "subshards" and have a separate +// TYPED_TEST macro invocation for each subshard. + +template +void TestIntegerMultiDimBroadcast(int selected_subshard, int subshard_count) { + ASSERT_LT(selected_subshard, subshard_count); + int iteration = 0; + for (uint32_t bm1 = 0; bm1 < (static_cast(1) << kMaxBroadcastDim); + bm1++) { + for (uint32_t bm2 = 0; bm2 < (static_cast(1) << kMaxBroadcastDim); + bm2++) { + if (iteration++ % subshard_count != selected_subshard) { + continue; // This iteration of the loop is not part of this subshard. + } + const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); + const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); + const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); + const bool input1_broadcast_dim4 = bm1 & (static_cast(1) << 3); + const bool input1_broadcast_dim5 = bm1 & (static_cast(1) << 4); + const bool input1_broadcast_dim6 = bm1 & (static_cast(1) << 5); + const bool input2_broadcast_dim1 = bm2 & (static_cast(1) << 0); + const bool input2_broadcast_dim2 = bm2 & (static_cast(1) << 1); + const bool input2_broadcast_dim3 = bm2 & (static_cast(1) << 2); + const bool input2_broadcast_dim4 = bm2 & (static_cast(1) << 3); + const bool input2_broadcast_dim5 = bm2 & (static_cast(1) << 4); + const bool input2_broadcast_dim6 = bm2 & (static_cast(1) << 5); + const int input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1; + const int input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2; + const int input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3; + const int input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4; + const int input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5; + const int input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6; + const int input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1; + const int input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2; + const int input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3; + const int input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4; + const int input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5; + const int input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6; + std::vector input1_full_shape{input1_dim1, input1_dim2, input1_dim3, + input1_dim4, input1_dim5, input1_dim6}; + std::vector input2_full_shape{input2_dim1, input2_dim2, input2_dim3, + input2_dim4, input2_dim5, input2_dim6}; + for (int input1_dims = 1; input1_dims <= kMaxBroadcastDim; + ++input1_dims) { + for (int input2_dims = 1; input2_dims <= kMaxBroadcastDim; + ++input2_dims) { + std::vector input1_shape(input1_dims), input2_shape(input2_dims); + std::copy(input1_full_shape.end() - input1_dims, + input1_full_shape.end(), input1_shape.data()); + std::copy(input2_full_shape.end() - input2_dims, + input2_full_shape.end(), input2_shape.data()); + TestIntegerBroadcast(input1_shape, input2_shape); + } + } + } + } +} + +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard0) { + TestIntegerMultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard1) { + TestIntegerMultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard2) { + TestIntegerMultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard3) { + TestIntegerMultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard4) { + TestIntegerMultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard5) { + TestIntegerMultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard6) { + TestIntegerMultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard7) { + TestIntegerMultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard8) { + TestIntegerMultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerSubOpTest, IntegerMultiDimBroadcastSubshard9) { + TestIntegerMultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + +template +void TestQuantizedBroadcast(std::vector input1_shape, + std::vector input2_shape) { + std::array input1_dims; + std::array input2_dims; + std::array output_dims; + std::array input1_strides; + std::array input2_strides; + std::array output_strides; + std::fill(input1_dims.begin(), input1_dims.end(), 1); + std::fill(input2_dims.begin(), input2_dims.end(), 1); + std::fill(output_dims.begin(), output_dims.end(), 1); + std::copy(input1_shape.cbegin(), input1_shape.cend(), + input1_dims.end() - input1_shape.size()); + std::copy(input2_shape.cbegin(), input2_shape.cend(), + input2_dims.end() - input2_shape.size()); + + for (size_t i = 0; i < kMaxBroadcastDim; i++) { + if (input1_dims[i] != 1 && input2_dims[i] != 1) { + ASSERT_EQ(input1_dims[i], input2_dims[i]); + } + output_dims[i] = std::max(input1_dims[i], input2_dims[i]); + } + // Compute generalized strides. + size_t input1_stride = 1, input2_stride = 1, output_stride = 1; + for (size_t i = kMaxBroadcastDim; i != 0; i--) { + input1_strides[i - 1] = input1_dims[i - 1] == 1 ? 0 : input1_stride; + input2_strides[i - 1] = input2_dims[i - 1] == 1 ? 0 : input2_stride; + output_strides[i - 1] = output_stride; + input1_stride *= input1_dims[i - 1]; + input2_stride *= input2_dims[i - 1]; + output_stride *= output_dims[i - 1]; + } + const int num_input1_elements = std::accumulate( + input1_dims.begin(), input1_dims.end(), 1, std::multiplies()); + const int num_input2_elements = std::accumulate( + input2_dims.begin(), input2_dims.end(), 1, std::multiplies()); + const int num_output_elements = std::accumulate( + output_dims.begin(), output_dims.end(), 1, std::multiplies()); + std::vector input1(num_input1_elements); + std::vector input2(num_input2_elements); + std::vector output_ref(num_output_elements); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + + std::uniform_real_distribution dist(-0.5f, 0.5f); + + std::generate(input1.begin(), input1.end(), [&]() { return dist(rng); }); + std::generate(input2.begin(), input2.end(), [&]() { return dist(rng); }); + + QuantizedSubOpModel m( + {GetTensorType(), input1_shape, -0.5f, 0.5f}, + {GetTensorType(), input2_shape, -0.5f, 0.5f}, + {GetTensorType(), {}, -0.5f, 0.5f}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), input1); + m.QuantizeAndPopulate(m.input2(), input2); + // Compute reference results. + for (size_t i = 0; i < output_dims[0]; i++) { + for (size_t j = 0; j < output_dims[1]; j++) { + for (size_t k = 0; k < output_dims[2]; k++) { + for (size_t l = 0; l < output_dims[3]; l++) { + for (size_t m = 0; m < output_dims[4]; m++) { + for (size_t n = 0; n < output_dims[5]; n++) { + float x = input1[i * input1_strides[0] + j * input1_strides[1] + + k * input1_strides[2] + l * input1_strides[3] + + m * input1_strides[4] + n * input1_strides[5]]; + float y = input2[i * input2_strides[0] + j * input2_strides[1] + + k * input2_strides[2] + l * input2_strides[3] + + m * input2_strides[4] + n * input2_strides[5]]; + output_ref[i * output_strides[0] + j * output_strides[1] + + k * output_strides[2] + l * output_strides[3] + + m * output_strides[4] + n * output_strides[5]] = x - y; + } + } + } + } + } + } + + for (float& output_value : output_ref) { + output_value = std::max(output_value, -1.0f); + output_value = std::min(output_value, 1.0f); + } + + ASSERT_EQ(m.Invoke(), kTfLiteOk); + std::vector output = m.GetDequantizedOutput(); + for (size_t i = 0; i < output_dims[0]; i++) { + for (size_t j = 0; j < output_dims[1]; j++) { + for (size_t k = 0; k < output_dims[2]; k++) { + for (size_t l = 0; l < output_dims[3]; l++) { + for (size_t m = 0; m < output_dims[4]; m++) { + for (size_t n = 0; n < output_dims[5]; n++) { + const size_t index = + i * output_strides[0] + j * output_strides[1] + + k * output_strides[2] + l * output_strides[3] + + m * output_strides[4] + n * output_strides[5]; + EXPECT_NEAR(output[index], output_ref[index], 0.6f) + << "(i, j, k, l, m, n) = (" << i << ", " << j << ", " << k + << ", " << l << ", " << m << ", " << n << ")"; + } + } + } + } + } + } +} + +template +class QuantizedSubOpTest : public ::testing::Test {}; + +using Int8OrUInt8OrInt16Types = ::testing::Types; +TYPED_TEST_SUITE(QuantizedSubOpTest, Int8OrUInt8OrInt16Types); + +// To improve automatic test sharding (via shard_count in the BUILD file), +// we need to ensure that each individual test case runs in a reasonable time, +// otherwise we end up being limited by the performance of the longest shard. +// Since TestQuantizedMultiDimBroadcast has 2^12 iterations, it takes a +// long time (over 30 seconds) to execute all iterations -- too long for a +// single shard. So we split it into a few "subshards" and have a separate +// TEST macro invocation for each subshard. + +template +void TestQuantizedMultiDimBroadcast(int selected_subshard, int subshard_count) { + ASSERT_LT(selected_subshard, subshard_count); + int iteration = 0; + for (uint32_t bm1 = 0; bm1 < (static_cast(1) << kMaxBroadcastDim); + bm1++) { + for (uint32_t bm2 = 0; bm2 < (static_cast(1) << kMaxBroadcastDim); + bm2++) { + if (iteration++ % subshard_count != selected_subshard) { + continue; // This iteration of the loop is not part of this subshard. + } + const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); + const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); + const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); + const bool input1_broadcast_dim4 = bm1 & (static_cast(1) << 3); + const bool input1_broadcast_dim5 = bm1 & (static_cast(1) << 4); + const bool input1_broadcast_dim6 = bm1 & (static_cast(1) << 5); + const bool input2_broadcast_dim1 = bm2 & (static_cast(1) << 0); + const bool input2_broadcast_dim2 = bm2 & (static_cast(1) << 1); + const bool input2_broadcast_dim3 = bm2 & (static_cast(1) << 2); + const bool input2_broadcast_dim4 = bm2 & (static_cast(1) << 3); + const bool input2_broadcast_dim5 = bm2 & (static_cast(1) << 4); + const bool input2_broadcast_dim6 = bm2 & (static_cast(1) << 5); + const int input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1; + const int input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2; + const int input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3; + const int input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4; + const int input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5; + const int input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6; + const int input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1; + const int input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2; + const int input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3; + const int input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4; + const int input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5; + const int input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6; + std::vector input1_full_shape{input1_dim1, input1_dim2, input1_dim3, + input1_dim4, input1_dim5, input1_dim6}; + std::vector input2_full_shape{input2_dim1, input2_dim2, input2_dim3, + input2_dim4, input2_dim5, input2_dim6}; + for (int input1_dims = 1; input1_dims <= kMaxBroadcastDim; + ++input1_dims) { + for (int input2_dims = 1; input2_dims <= kMaxBroadcastDim; + ++input2_dims) { + std::vector input1_shape(input1_dims), input2_shape(input2_dims); + std::copy(input1_full_shape.end() - input1_dims, + input1_full_shape.end(), input1_shape.data()); + std::copy(input2_full_shape.end() - input2_dims, + input2_full_shape.end(), input2_shape.data()); + TestQuantizedBroadcast(input1_shape, input2_shape); + } + } + } + } +} + +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard0) { + TestQuantizedMultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard1) { + TestQuantizedMultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard2) { + TestQuantizedMultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard3) { + TestQuantizedMultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard4) { + TestQuantizedMultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard5) { + TestQuantizedMultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard6) { + TestQuantizedMultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard7) { + TestQuantizedMultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard8) { + TestQuantizedMultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Int8QuantizedMultiDimBroadcastSubshard9) { + TestQuantizedMultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard0) { + TestQuantizedMultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard1) { + TestQuantizedMultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard2) { + TestQuantizedMultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard3) { + TestQuantizedMultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard4) { + TestQuantizedMultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard5) { + TestQuantizedMultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard6) { + TestQuantizedMultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard7) { + TestQuantizedMultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard8) { + TestQuantizedMultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedSubOpModel, Uint8QuantizedMultiDimBroadcastSubshard9) { + TestQuantizedMultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + } // namespace } // namespace tflite From 1b257eecced46265a7f3782c0cfaf70a32d73abd Mon Sep 17 00:00:00 2001 From: Jared Junyoung Lim Date: Sat, 23 Sep 2023 01:57:49 -0700 Subject: [PATCH 191/567] Optimize BroadcastSub functions in TFLite reference kernel. It removes redundant dimensions, compresses compressible dimensions, and handles broadcasting. Also vectorize int32 element-wise subtraction. PiperOrigin-RevId: 567825531 --- tensorflow/lite/kernels/internal/common.h | 4 +- .../lite/kernels/internal/reference/sub.h | 396 ++++++++++-------- 2 files changed, 221 insertions(+), 179 deletions(-) diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h index 3e95c54def7aa8..14d859917522f7 100644 --- a/tensorflow/lite/kernels/internal/common.h +++ b/tensorflow/lite/kernels/internal/common.h @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "tensorflow/lite/kernels/internal/runtime_shape.h" #ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK @@ -60,8 +61,7 @@ bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape, const size_t num_input2_dims = input2_shape.DimensionsCount(); const int32_t* input1_dims = input1_shape.DimsData(); const int32_t* input2_dims = input2_shape.DimsData(); - const size_t num_common_dims = - (num_input1_dims < num_input2_dims) ? num_input1_dims : num_input2_dims; + const size_t num_common_dims = std::min(num_input1_dims, num_input2_dims); for (size_t i = 1; i <= num_common_dims; i++) { const size_t input1_dim = input1_dims[num_input1_dims - i]; const size_t input2_dim = input2_dims[num_input2_dims - i]; diff --git a/tensorflow/lite/kernels/internal/reference/sub.h b/tensorflow/lite/kernels/internal/reference/sub.h index 50e8ec1ee06f53..862bee149a5a69 100644 --- a/tensorflow/lite/kernels/internal/reference/sub.h +++ b/tensorflow/lite/kernels/internal/reference/sub.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include #include "ruy/profiler/instrumentation.h" // from @ruy @@ -29,96 +30,179 @@ namespace tflite { namespace reference_ops { -// Maximum dimension supported by the broadcast sub operation. -constexpr int kMaxSubBroadcastDim = 6; +template +struct SubImpl { + template + static void BroadcastInput1(const ArithmeticParams& params, + const T* input1_data, const T* input2_data, + T* output_data, size_t size, F binary_func) { + for (int c = 0; c < size; ++c) { + output_data[c] = binary_func(input1_data[0], input2_data[c], params); + } + } -inline void SubNonBroadcast(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const float* input1_data, - const RuntimeShape& input2_shape, - const float* input2_data, - const RuntimeShape& output_shape, - float* output_data) { - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] - input2_data[i], params.float_activation_min, - params.float_activation_max); + template + static void BroadcastInput2(const ArithmeticParams& params, + const T* input1_data, const T* input2_data, + T* output_data, size_t size, F binary_func) { + for (int c = 0; c < size; ++c) { + output_data[c] = binary_func(input1_data[c], input2_data[0], params); + } } -} -inline void SubNonBroadcast(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int32_t* input1_data, - const RuntimeShape& input2_shape, - const int32_t* input2_data, - const RuntimeShape& output_shape, - int32_t* output_data) { - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] - input2_data[i], params.quantized_activation_min, - params.quantized_activation_max); + template + static void ElementWise(const ArithmeticParams& params, const T* input1_data, + const T* input2_data, T* output_data, size_t size, + F binary_func) { + for (int c = 0; c < size; ++c) { + output_data[c] = binary_func(input1_data[c], input2_data[c], params); + } } -} +}; + +template <> +struct SubImpl { + template + static void BroadcastInput1(const ArithmeticParams& params, + const int32_t* input1_data, + const int32_t* input2_data, int32_t* output_data, + size_t size, F binary_func) { + size_t c = 0; + int32_t activation_min, activation_max; + GetActivationParams(params, &activation_min, &activation_max); +#ifdef USE_NEON + const int32x4_t vmax = vdupq_n_s32(activation_max); + const int32x4_t vmin = vdupq_n_s32(activation_min); + const int32x4_t va = vdupq_n_s32(input1_data[0]); + for (; c + 4 <= size; c += 4) { + const int32x4_t vb = vld1q_s32(&input2_data[c]); + int32x4_t vres = vsubq_s32(va, vb); + vres = vmaxq_s32(vmin, vres); + vres = vminq_s32(vmax, vres); + vst1q_s32(&output_data[c], vres); + } +#endif + for (; c < size; ++c) { + output_data[c] = binary_func(input1_data[0], input2_data[c], params); + } + } + + template + static void BroadcastInput2(const ArithmeticParams& params, + const int32_t* input1_data, + const int32_t* input2_data, int32_t* output_data, + size_t size, F binary_func) { + size_t c = 0; + int32_t activation_min, activation_max; + GetActivationParams(params, &activation_min, &activation_max); +#ifdef USE_NEON + const int32x4_t vmax = vdupq_n_s32(activation_max); + const int32x4_t vmin = vdupq_n_s32(activation_min); + const int32x4_t vb = vdupq_n_s32(input2_data[0]); + for (; c + 4 <= size; c += 4) { + const int32x4_t va = vld1q_s32(&input1_data[c]); + int32x4_t vres = vsubq_s32(va, vb); + vres = vmaxq_s32(vmin, vres); + vres = vminq_s32(vmax, vres); + vst1q_s32(&output_data[c], vres); + } +#endif + for (; c < size; ++c) { + output_data[c] = binary_func(input1_data[c], input2_data[0], params); + } + } + + template + static void ElementWise(const ArithmeticParams& params, + const int32_t* input1_data, + const int32_t* input2_data, int32_t* output_data, + size_t size, F binary_func) { + size_t c = 0; + int32_t activation_min, activation_max; + GetActivationParams(params, &activation_min, &activation_max); +#ifdef USE_NEON + int32x4_t vmax = vdupq_n_s32(activation_max); + int32x4_t vmin = vdupq_n_s32(activation_min); + for (; c + 4 <= size; c += 4) { + const int32x4_t va = vld1q_s32(&input1_data[c]); + const int32x4_t vb = vld1q_s32(&input2_data[c]); + int32x4_t vres = vsubq_s32(va, vb); + vres = vmaxq_s32(vmin, vres); + vres = vminq_s32(vmax, vres); + vst1q_s32(&output_data[c], vres); + } +#endif + for (; c < size; ++c) { + output_data[c] = binary_func(input1_data[c], input2_data[c], params); + } + } +}; template -void BroadcastSubRecursiveDimensions( - const ArithmeticParams& params, int dimension, const T* input1_data, +inline void BroadcastSubRecursiveDimensions( + int dimension, const ArithmeticParams& params, const T* input1_data, const T* input2_data, T* output_data, size_t* input1_offset_p, size_t* input2_offset_p, size_t* output_offset, - const NdArrayDesc& desc1, - const NdArrayDesc& desc2, - const int32_t extended_output_shape_dims[kMaxSubBroadcastDim], - F binary_func) { - if (dimension == kMaxSubBroadcastDim - 1) { - for (int c = 0; c < extended_output_shape_dims[dimension]; ++c) { - const T input1_val = input1_data[*input1_offset_p]; - const T input2_val = input2_data[*input2_offset_p]; - output_data[*output_offset] = binary_func(params, input1_val, input2_val); - *input1_offset_p += desc1.strides[dimension]; - *input2_offset_p += desc2.strides[dimension]; - ++(*output_offset); - } - } else { - for (int a = 0; a < extended_output_shape_dims[dimension]; ++a) { + size_t* compressed_input1_stride, size_t* compressed_input2_stride, + size_t* compressed_output_shape, F binary_func) { + if (dimension > 0) { + for (int c = 0; c < compressed_output_shape[dimension]; ++c) { size_t input1_offset_c = *input1_offset_p; size_t input2_offset_c = *input2_offset_p; BroadcastSubRecursiveDimensions( - params, dimension + 1, input1_data, input2_data, output_data, - &input1_offset_c, &input2_offset_c, output_offset, desc1, desc2, - extended_output_shape_dims, binary_func); - *input1_offset_p += desc1.strides[dimension]; - *input2_offset_p += desc2.strides[dimension]; + dimension - 1, params, input1_data, input2_data, output_data, + &input1_offset_c, &input2_offset_c, output_offset, + compressed_input1_stride, compressed_input2_stride, + compressed_output_shape, binary_func); + *input1_offset_p += compressed_input1_stride[dimension]; + *input2_offset_p += compressed_input2_stride[dimension]; + } + } else { + TFLITE_DCHECK(dimension == 0); + bool input1_is_broadcast = compressed_input1_stride[dimension] == 0; + bool input2_is_broadcast = compressed_input2_stride[dimension] == 0; + TFLITE_DCHECK(!(input1_is_broadcast && input2_is_broadcast)); + const T* input1_data_ptr = input1_data + *input1_offset_p; + const T* input2_data_ptr = input2_data + *input2_offset_p; + T* output_data_ptr = output_data + *output_offset; + if (input1_is_broadcast) { + // input1 is broadcast. + SubImpl::BroadcastInput1( + params, input1_data_ptr, input2_data_ptr, output_data_ptr, + compressed_output_shape[dimension], binary_func); + *input2_offset_p += compressed_output_shape[dimension]; + } else if (input2_is_broadcast) { + // input2 is broadcast. + SubImpl::BroadcastInput2( + params, input1_data_ptr, input2_data_ptr, output_data_ptr, + compressed_output_shape[dimension], binary_func); + *input1_offset_p += compressed_output_shape[dimension]; + } else { + // Add element-wise. + SubImpl::ElementWise(params, input1_data_ptr, input2_data_ptr, + output_data_ptr, + compressed_output_shape[dimension], binary_func); + *input1_offset_p += compressed_output_shape[dimension]; + *input2_offset_p += compressed_output_shape[dimension]; } + *output_offset += compressed_output_shape[dimension]; } } -// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary -// dimensionality if the runtime code does a single loop over one dimension -// that handles broadcasting as the base case. The code generator would then -// generate max(D1, D2) nested for loops. -template -void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/T"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxSubBroadcastDim); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxSubBroadcastDim); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxSubBroadcastDim); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); - // Cache output shape dimensions. - int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; - std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), - sizeof(extended_output_shape_dims)); +// TODO: b/296510380 - we may be able to factor out this to common.h for all +// binary arithmetic ops (add, sub, mul). +template +inline void BroadcastSubCommon(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const T* input1_data, + const RuntimeShape& input2_shape, + const T* input2_data, + const RuntimeShape& output_shape, T* output_data, + F binary_func) { + constexpr int kMaxBroadcastDim = 6; + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxBroadcastDim); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxBroadcastDim); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxBroadcastDim); // In Tensorflow, the dimensions are canonically named (batch_number, row, // col, channel), with extents (batches, height, width, depth), with the @@ -131,18 +215,57 @@ void BroadcastSubSlow(const ArithmeticParams& params, // We name our variables by their Tensorflow convention, but generate C code // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + + size_t compressed_input1_stride[kMaxBroadcastDim]; + size_t compressed_input2_stride[kMaxBroadcastDim]; + size_t compressed_output_shape[kMaxBroadcastDim]; + bool broadcastable_shape = ReduceDimensionsForBroadcast( + input1_shape, input2_shape, compressed_input1_stride, + compressed_input2_stride, compressed_output_shape); + // Skip broadcasting for degenerate shapes. + if (!broadcastable_shape) { + return; + } + size_t input1_offset = 0; size_t input2_offset = 0; size_t output_offset = 0; BroadcastSubRecursiveDimensions( - params, 0, input1_data, input2_data, output_data, &input1_offset, - &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, - [](const ArithmeticParams& params, const T input1_val, - const T input2_val) { + kMaxBroadcastDim - 1, params, input1_data, input2_data, output_data, + &input1_offset, &input2_offset, &output_offset, compressed_input1_stride, + compressed_input2_stride, compressed_output_shape, binary_func); +} + +// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastSubSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/T"); + BroadcastSubCommon( + params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data, + [](T input1_val, T input2_val, const ArithmeticParams& params) { T activation_min, activation_max; GetActivationParams(params, &activation_min, &activation_max); - return ActivationFunctionWithMinMax(input1_val - input2_val, - activation_min, activation_max); + return ActivationFunctionWithMinMax(input1_val - input2_val, + activation_min, activation_max); }); } @@ -154,39 +277,11 @@ inline void BroadcastSub16POTSlow(const ArithmeticParams& params, const RuntimeShape& output_shape, int16_t* output_data) { ruy::profiler::ScopeLabel label("BroadcastSub16POTSlow/int16_t"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxSubBroadcastDim); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxSubBroadcastDim); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxSubBroadcastDim); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); - // Cache output shape dimensions. - int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; - std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), - sizeof(extended_output_shape_dims)); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - size_t input1_offset = 0; - size_t input2_offset = 0; - size_t output_offset = 0; - BroadcastSubRecursiveDimensions( - params, 0, input1_data, input2_data, output_data, &input1_offset, - &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, - [](const ArithmeticParams& params, const int16_t input1_val, - const int16_t input2_val) { + BroadcastSubCommon( + params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data, + [](int16_t input1_val, int16_t input2_val, + const ArithmeticParams& params) { const int32_t scaled_input1_val = gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift); const int32_t scaled_input2_val = @@ -207,39 +302,10 @@ void BroadcastQuantSubSlow(const ArithmeticParams& params, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { ruy::profiler::ScopeLabel label("BroadcastQuantSubSlow/T"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), kMaxSubBroadcastDim); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), kMaxSubBroadcastDim); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), kMaxSubBroadcastDim); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); - // Cache output shape dimensions. - int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; - std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), - sizeof(extended_output_shape_dims)); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - size_t input1_offset = 0; - size_t input2_offset = 0; - size_t output_offset = 0; - BroadcastSubRecursiveDimensions( - params, 0, input1_data, input2_data, output_data, &input1_offset, - &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, - [](const ArithmeticParams& params, const T input1_val, - const T input2_val) { + BroadcastSubCommon( + params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data, + [](T input1_val, T input2_val, const ArithmeticParams& params) { const int32_t shifted_input1_val = (params.input1_offset + input1_val) * (1 << params.left_shift); const int32_t shifted_input2_val = @@ -348,36 +414,12 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(kMaxSubBroadcastDim, output_shape); - // Cache output shape dimensions. - int32_t extended_output_shape_dims[kMaxSubBroadcastDim]; - std::memcpy(extended_output_shape_dims, extended_output_shape.DimsData(), - sizeof(extended_output_shape_dims)); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - size_t input1_offset = 0; - size_t input2_offset = 0; - size_t output_offset = 0; - BroadcastSubRecursiveDimensions( - params, 0, input1_data, input2_data, output_data, &input1_offset, - &input2_offset, &output_offset, desc1, desc2, extended_output_shape_dims, - [](const ArithmeticParams& params, const T input1_val, - const T input2_val) { return input1_val - input2_val; }); + BroadcastSubCommon( + params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data, + [](T input1_val, T input2_val, const ArithmeticParams& params) { + return input1_val - input2_val; + }); } inline void SetActivationMinMax(const ArithmeticParams& params, From c8fafddb7a53d779aa842ce075a2a0bea84711ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 23 Sep 2023 02:01:58 -0700 Subject: [PATCH 192/567] compat: Update forward compatibility horizon to 2023-09-23 PiperOrigin-RevId: 567826030 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 4145a22481f391..ba7e2ef349203f 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 22) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 23) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 35f4b9497cd316842d2d433b38be5ad28cb0e997 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 23 Sep 2023 02:04:35 -0700 Subject: [PATCH 193/567] Update GraphDef version to 1628. PiperOrigin-RevId: 567826408 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 470b11e06459b2..5419c15d3aec47 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1627 // Updated: 2023/9/22 +#define TF_GRAPH_DEF_VERSION 1628 // Updated: 2023/9/23 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From bb5c4abf1078ec2fde15f34052f7ba7d9251ab09 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 23 Sep 2023 07:51:38 -0700 Subject: [PATCH 194/567] [XLA] Fix build A bad dep added in 198144efc4235162423939b2256505765f78e145 broke the build. PiperOrigin-RevId: 567864100 --- third_party/xla/xla/BUILD | 1 - third_party/xla/xla/comparison_util.h | 1 - 2 files changed, 2 deletions(-) diff --git a/third_party/xla/xla/BUILD b/third_party/xla/xla/BUILD index 184e312a72ede1..1b728600ea6a37 100644 --- a/third_party/xla/xla/BUILD +++ b/third_party/xla/xla/BUILD @@ -144,7 +144,6 @@ cc_library( ":xla_data_proto_cc", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:float8", "@local_tsl//tsl/platform:logging", diff --git a/third_party/xla/xla/comparison_util.h b/third_party/xla/xla/comparison_util.h index 364e503ff7c0c1..0c11edf302d464 100644 --- a/third_party/xla/xla/comparison_util.h +++ b/third_party/xla/xla/comparison_util.h @@ -22,7 +22,6 @@ limitations under the License. #include #include -#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "xla/primitive_util.h" #include "xla/statusor.h" From 51cc9dc4d9131e832bfc858dc59159e63ea8f116 Mon Sep 17 00:00:00 2001 From: Ramesh Sampath Date: Sat, 23 Sep 2023 11:33:30 -0700 Subject: [PATCH 195/567] Adds back `allow_multiple_exports` which is a no-op to maintain `keras_export` compatibility with TF 2.13 and TF Nightly. PiperOrigin-RevId: 567886699 --- tensorflow/python/tools/api/generator2/extractor/extractor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/tools/api/generator2/extractor/extractor.py b/tensorflow/python/tools/api/generator2/extractor/extractor.py index 6e859ffef4aae5..ccdd13099d13c4 100644 --- a/tensorflow/python/tools/api/generator2/extractor/extractor.py +++ b/tensorflow/python/tools/api/generator2/extractor/extractor.py @@ -335,6 +335,9 @@ def _add_exported_symbol(self, node: ast.Call, symbol_name: str) -> None: v1_apis = tuple( f'{self._api_name}.{v}' for v in self._literal_value(kw.value) ) + elif kw.arg == 'allow_multiple_exports': + # no-op kept for backward comapatibility of `tf-keras` with TF 2.13 + pass else: raise BadExportError( f'{self._current_file}:{node.lineno} export called' From 39447256a55bf5cd69907ae261eaa6ac98f80a1c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 23 Sep 2023 12:01:50 -0700 Subject: [PATCH 196/567] Internal Code Change PiperOrigin-RevId: 567889304 --- tensorflow/compiler/mlir/tfrt/ir/mlrt/BUILD | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/tfrt/ir/mlrt/BUILD b/tensorflow/compiler/mlir/tfrt/ir/mlrt/BUILD index 2001bfc7186ca9..3e7d5db414a542 100644 --- a/tensorflow/compiler/mlir/tfrt/ir/mlrt/BUILD +++ b/tensorflow/compiler/mlir/tfrt/ir/mlrt/BUILD @@ -16,7 +16,6 @@ td_library( # copybara:uncomment "//learning/brain/tfrt/mlir:__subpackages__", "//learning/infra/mira/distributed:__subpackages__", "//tensorflow/compiler/mlir/tfrt:__subpackages__", - "//tensorflow/core/tfrt/mlrt:__subpackages__", ], deps = [ "@llvm-project//mlir:OpBaseTdFiles", @@ -91,7 +90,7 @@ td_library( ], includes = ["."], visibility = [ - "//tensorflow/core/tfrt/mlrt:__subpackages__", + "//visibility:private", # Only private by automation, not intent. Owner may accept CLs adding visibility. See go/scheuklappen#explicit-private. ], deps = [ ":mlrt_td_files", @@ -161,7 +160,6 @@ cc_library( # copybara:uncomment "//learning/brain/experimental/tfrt/mlrt/application/tensorflow/tests:__subpackages__", # copybara:uncomment "//learning/infra/mira/distributed:__subpackages__", "//tensorflow/compiler/mlir/tfrt:__subpackages__", - "//tensorflow/core/tfrt/mlrt:__subpackages__", ], deps = [ ":mlrt_ops", @@ -180,10 +178,7 @@ cc_library( name = "tf_mlrt_tpu_ops", srcs = ["tf_mlrt_tpu_ops.cc"], hdrs = ["tf_mlrt_tpu_ops.h"], - visibility = [ - "//tensorflow/compiler/mlir/tfrt/transforms/mlrt:__subpackages__", - "//tensorflow/core/tfrt/mlrt:__subpackages__", - ], + visibility = ["//tensorflow/compiler/mlir/tfrt/transforms/mlrt:__subpackages__"], deps = [ ":mlrt_ops", ":tf_mlrt_ops", From e1538f18a95d9fed68a12a5aa2d46d1148588809 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 23 Sep 2023 18:48:03 -0700 Subject: [PATCH 197/567] Remove absl logging includes that are breaking build. PiperOrigin-RevId: 567927507 --- third_party/xla/xla/pjrt/BUILD | 2 -- third_party/xla/xla/pjrt/pjrt_client.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/third_party/xla/xla/pjrt/BUILD b/third_party/xla/xla/pjrt/BUILD index bf8cce6944164a..8666c726b89af4 100644 --- a/third_party/xla/xla/pjrt/BUILD +++ b/third_party/xla/xla/pjrt/BUILD @@ -190,8 +190,6 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/log", - "@com_google_absl//absl/log:check", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", diff --git a/third_party/xla/xla/pjrt/pjrt_client.h b/third_party/xla/xla/pjrt/pjrt_client.h index 4c32a1b81fd180..c4b73edea0529b 100644 --- a/third_party/xla/xla/pjrt/pjrt_client.h +++ b/third_party/xla/xla/pjrt/pjrt_client.h @@ -31,8 +31,6 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" #include "absl/functional/any_invocable.h" -#include "absl/log/check.h" -#include "absl/log/log.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" From 359cf4552e205f0fb9cba966932273f1df3c8fd0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 23 Sep 2023 18:52:04 -0700 Subject: [PATCH 198/567] Having live range dcheck respect threads to avoid false positive. PiperOrigin-RevId: 567927880 --- third_party/xla/xla/service/copy_insertion.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/third_party/xla/xla/service/copy_insertion.cc b/third_party/xla/xla/service/copy_insertion.cc index 77e10c3f21508e..a49fb6914dc566 100644 --- a/third_party/xla/xla/service/copy_insertion.cc +++ b/third_party/xla/xla/service/copy_insertion.cc @@ -1188,7 +1188,8 @@ class CopyRemover { }; CopyRemover(const HloModule& module, const HloAliasAnalysis& alias_analysis, - HloOrdering* ordering, bool check_live_range_ordering) + HloOrdering* ordering, bool check_live_range_ordering, + const absl::flat_hash_set& execution_threads) : dataflow_(alias_analysis.dataflow_analysis()), ordering_(ordering) { // Construct a list for each HLO buffer in the alias analysis. Maintain a // map from HloValue to the respective list element representing that @@ -1203,6 +1204,14 @@ class CopyRemover { continue; } if (check_live_range_ordering) { + // Skip checking if execution thread is not included. + auto should_skip_value = [&execution_threads](const HloValue* value) { + return value->defining_instruction()->parent() != nullptr && + !HloInstruction::IsThreadIncluded(value->defining_instruction() + ->parent() + ->execution_thread(), + execution_threads); + }; // Verify values contained in the buffer are strictly ordered. This // should always be the case after adding copies to eliminate // interference. Specifically, the addition of the control flow edges @@ -1213,8 +1222,11 @@ class CopyRemover { // Token values have no representation and cannot interfere. continue; } + if (should_skip_value(value_a)) { + continue; + } for (const HloValue* value_b : buffer.values()) { - if (value_a != value_b) { + if (!should_skip_value(value_b) && value_a != value_b) { DCHECK(ordering_->LiveRangeStrictlyBefore( *value_a, *value_b, dataflow_, /*use_is_always_before_def_in_same_instr=*/true) || @@ -2111,7 +2123,7 @@ Status CopyInsertion::RemoveUnnecessaryCopies( TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, HloAliasAnalysis::Run(module, can_share_buffer_)); CopyRemover copy_remover(*module, *alias_analysis, ordering.get(), - check_live_range_ordering); + check_live_range_ordering, execution_threads); if (VLOG_IS_ON(3)) { LOG(INFO) << "Removing unnecessary copies in " << module->name(); LOG(INFO) << "Buffer values, in dependency order: "; From c7eaebc0792ca5748bf85ec35c10f79851b78370 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 24 Sep 2023 02:02:26 -0700 Subject: [PATCH 199/567] compat: Update forward compatibility horizon to 2023-09-24 PiperOrigin-RevId: 567978384 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index ba7e2ef349203f..0ee9625866060c 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 23) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 24) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From fa4274041302f039f47527af1863e6574e73a42c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 24 Sep 2023 02:03:15 -0700 Subject: [PATCH 200/567] Update GraphDef version to 1629. PiperOrigin-RevId: 567978540 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 5419c15d3aec47..aae48690687289 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1628 // Updated: 2023/9/23 +#define TF_GRAPH_DEF_VERSION 1629 // Updated: 2023/9/24 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From bbb1f8e55b8c738b20a1d06b792c43157ac8c1cd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 24 Sep 2023 10:40:59 -0700 Subject: [PATCH 201/567] [xla:gpu] Set cublas workspace after calling cublasSetStream We need to set workspace because new memory is allocated every time cublas calls is captured in cuda graph: https://docs.nvidia.com/cuda/cublas/index.html#cuda-graphs-support We need to call it after set stream because cublasSetStream resets workspace: https://docs.nvidia.com/cuda/cublas/index.html#cuda-graphs-support PiperOrigin-RevId: 568032523 --- .../xla/xla/stream_executor/cuda/cuda_blas.cc | 39 ------------------- .../xla/xla/stream_executor/cuda/cuda_blas.h | 5 --- .../xla/stream_executor/gpu/gpu_executor.h | 2 - 3 files changed, 46 deletions(-) diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc b/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc index b56fb1236b45f5..d9d5451eb912a7 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_blas.cc @@ -20,7 +20,6 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" -#include "absl/synchronization/mutex.h" #include "Eigen/Core" // from @eigen_archive #include "third_party/gpus/cuda/include/cublas_v2.h" #include "third_party/gpus/cuda/include/cuda.h" @@ -199,19 +198,6 @@ bool CUDABlas::Init() { } #endif // CUDA_VERSION >= 11000 - // Initialize cuBLAS workspace memory on device. The workspace size is - // determined by the GPU architecture: - // https://docs.nvidia.com/cuda/cublas/index.html#cublassetworkspace - absl::MutexLock lock(&mu_); - uint64_t workspace_size = - parent_->cc_major() >= 9 ? 1 << 25 /*32 MiB*/ : 1 << 22 /*4 MiB*/; - workspace_ = parent_->Allocate(workspace_size, /*memory_space=*/0); - - if (workspace_.is_null()) { - LOG(ERROR) << "Failed to allocate workspace memory"; - return false; - } - return true; } @@ -228,9 +214,6 @@ CUDABlas::CUDABlas(gpu::GpuExecutor *parent) CUDABlas::~CUDABlas() { if (blas_ != nullptr) { gpu::ScopedActivateExecutorContext sac{parent_}; - if (!workspace_.is_null()) { - parent_->Deallocate(&workspace_); - } cublasDestroy(blas_); } } @@ -249,24 +232,6 @@ bool CUDABlas::SetStream(Stream *stream) { return true; } -bool CUDABlas::SetWorkspace() { - CHECK(blas_ != nullptr); - gpu::ScopedActivateExecutorContext sac{parent_}; - - if (workspace_.is_null()) { - LOG(ERROR) << "cuBLAS workspace is not allocated"; - return false; - } - - cublasStatus_t ret = - cublasSetWorkspace(blas_, workspace_.opaque(), workspace_.size()); - if (ret != CUBLAS_STATUS_SUCCESS) { - LOG(ERROR) << "failed to set workspace for cuBLAS calls: " << ToString(ret); - return false; - } - return true; -} - cudaStream_t CUDABlas::CUDAStream(Stream *stream) { CHECK(stream != nullptr); CHECK(AsGpuStreamValue(stream) != nullptr); @@ -394,10 +359,6 @@ tsl::Status CUDABlas::DoBlasInternalImpl(FuncT cublas_func, Stream *stream, return tsl::errors::Internal("Failed setting stream"); } - if (!SetWorkspace()) { - return tsl::errors::Internal("Failed setting workspace"); - } - ScopedCublasMathMode math_mode{blas_}; #if CUBLAS_VER_MAJOR >= 11 if (math_type == CUBLAS_TF32_TENSOR_OP_MATH && diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_blas.h b/third_party/xla/xla/stream_executor/cuda/cuda_blas.h index 1e98ab1787d240..8869b284ce487a 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_blas.h +++ b/third_party/xla/xla/stream_executor/cuda/cuda_blas.h @@ -25,7 +25,6 @@ limitations under the License. #include "third_party/gpus/cuda/include/cublas_v2.h" #include "xla/stream_executor/blas.h" #include "xla/stream_executor/cuda/cuda_blas_lt.h" -#include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/platform/port.h" #include "xla/stream_executor/plugin_registry.h" @@ -71,8 +70,6 @@ class CUDABlas : public blas::BlasSupport { // invoked before calling into cuBLAS. bool SetStream(Stream *stream) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); - bool SetWorkspace() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); - // Returns the underlying CUDA stream. cudaStream_t CUDAStream(Stream *stream); @@ -124,8 +121,6 @@ class CUDABlas : public blas::BlasSupport { BlasLt blas_lt_; - DeviceMemoryBase workspace_ ABSL_GUARDED_BY(mu_); - SE_DISALLOW_COPY_AND_ASSIGN(CUDABlas); }; diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_executor.h b/third_party/xla/xla/stream_executor/gpu/gpu_executor.h index 06b2c014e88629..f5ed9093451926 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_executor.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_executor.h @@ -284,8 +284,6 @@ class GpuExecutor : public internal::StreamExecutorInterface { return it->second; } - int cc_major() const { return cc_major_; } - private: // Host callback landing routine invoked by CUDA. // data: User-provided callback provided to HostCallback() above, captured From 03813792494acc2512659e17773ec01969f9e633 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 24 Sep 2023 21:31:44 -0700 Subject: [PATCH 202/567] [XLA] Fix build for graph dumper for OSS builds The stream_executor target does not bring in the symbol definitions for PiperOrigin-RevId: 568101334 --- third_party/xla/xla/service/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index b32c8fb37c070f..5cd69b9ca4f2fd 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -5274,6 +5274,7 @@ cc_library( "//xla/service/gpu:backend_configs_cc", "//xla/service/gpu:cublas_cudnn", "//xla/stream_executor", + "//xla/stream_executor:dnn", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", From a984b89903194afde4cafab4658d1af8b5c5fc15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 24 Sep 2023 22:29:04 -0700 Subject: [PATCH 203/567] Internal, build update for lexan. PiperOrigin-RevId: 568111192 --- third_party/xla/third_party/tsl/tsl/tsl.bzl | 45 +++++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/third_party/xla/third_party/tsl/tsl/tsl.bzl b/third_party/xla/third_party/tsl/tsl/tsl.bzl index adc0cbf00c39c4..e1c2b364fbca78 100644 --- a/third_party/xla/third_party/tsl/tsl/tsl.bzl +++ b/third_party/xla/third_party/tsl/tsl/tsl.bzl @@ -1,18 +1,19 @@ """Provides build configuration for TSL""" +load("@bazel_skylib//lib:new_sets.bzl", "sets") load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda", ) load( - "//tsl/platform:rules_cc.bzl", - "cc_binary", - "cc_library", - "cc_shared_library", + "//third_party/compute_library:build_defs.bzl", + "if_enable_acl", ) load( - "@local_config_tensorrt//:build_defs.bzl", - "if_tensorrt", + "//third_party/mkl_dnn:build_defs.bzl", + "if_mkldnn_aarch64_acl", + "if_mkldnn_aarch64_acl_openmp", + "if_mkldnn_openmp", ) load( "@local_config_rocm//rocm:build_defs.bzl", @@ -26,16 +27,15 @@ load( "onednn_v3_define", ) load( - "//third_party/mkl_dnn:build_defs.bzl", - "if_mkldnn_aarch64_acl", - "if_mkldnn_aarch64_acl_openmp", - "if_mkldnn_openmp", + "//tsl/platform:rules_cc.bzl", + "cc_binary", + "cc_library", + "cc_shared_library", ) load( - "//third_party/compute_library:build_defs.bzl", - "if_enable_acl", + "@local_config_tensorrt//:build_defs.bzl", + "if_tensorrt", ) -load("@bazel_skylib//lib:new_sets.bzl", "sets") two_gpu_tags = ["requires-gpu-nvidia:2", "notap", "manual", "no_pip"] @@ -198,6 +198,14 @@ def if_with_tpu_support(if_true, if_false = []): def get_win_copts(is_external = False): WINDOWS_COPTS = [ + # copybara:uncomment_begin(no MSVC flags in google) + # "-DPLATFORM_WINDOWS", + # "-DEIGEN_HAS_C99_MATH", + # "-DTENSORFLOW_USE_EIGEN_THREADPOOL", + # "-DEIGEN_AVOID_STL_ARRAY", + # "-Iexternal/gemmlowp", + # "-DNOGDI", + # copybara:uncomment_end_and_comment_begin "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", @@ -212,13 +220,24 @@ def get_win_copts(is_external = False): # "/EHs-c-", "/wd4577", "/DNOGDI", + # copybara:comment_end # Also see build:windows lines in tensorflow/opensource_only/.bazelrc # where we set some other options globally. ] + if is_external: + # copybara:uncomment_begin(no MSVC flags in google) + # return WINDOWS_COPTS + ["-UTF_COMPILE_LIBRARY"] + # copybara:uncomment_end_and_comment_begin return WINDOWS_COPTS + ["/UTF_COMPILE_LIBRARY"] + # copybara:comment_end + else: + # copybara:uncomment_begin(no MSVC flags in google) + # return WINDOWS_COPTS + ["-DTF_COMPILE_LIBRARY"] + # copybara:uncomment_end_and_comment_begin return WINDOWS_COPTS + ["/DTF_COMPILE_LIBRARY"] + # copybara:comment_end def tsl_copts( android_optimization_level_override = "-O2", From 535288070845a6fb54e6d2a5b1b25549c80fedd2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 00:19:53 -0700 Subject: [PATCH 204/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/549bf94d9643e0a1a9ea71949fde9e0a21dd30ea. PiperOrigin-RevId: 568131952 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index 321212b36339e1..66c8f974bc5a69 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc" - TFRT_SHA256 = "775221f0d876c5d5df52f8c6fdd072bc52352c65276908d371fb164888e0c0d4" + TFRT_COMMIT = "549bf94d9643e0a1a9ea71949fde9e0a21dd30ea" + TFRT_SHA256 = "d8550a2abb57a78bd786947104d71f7735c96ed68b672e9afe2c45f143488ade" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index 321212b36339e1..66c8f974bc5a69 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc" - TFRT_SHA256 = "775221f0d876c5d5df52f8c6fdd072bc52352c65276908d371fb164888e0c0d4" + TFRT_COMMIT = "549bf94d9643e0a1a9ea71949fde9e0a21dd30ea" + TFRT_SHA256 = "d8550a2abb57a78bd786947104d71f7735c96ed68b672e9afe2c45f143488ade" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index 321212b36339e1..66c8f974bc5a69 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "e8d8c19d9314439fb3e1c08936ba0dc6863d1ccc" - TFRT_SHA256 = "775221f0d876c5d5df52f8c6fdd072bc52352c65276908d371fb164888e0c0d4" + TFRT_COMMIT = "549bf94d9643e0a1a9ea71949fde9e0a21dd30ea" + TFRT_SHA256 = "d8550a2abb57a78bd786947104d71f7735c96ed68b672e9afe2c45f143488ade" tf_http_archive( name = "tf_runtime", From 014b270e98a2a0b045f7e9e8c7a917bf3aad898d Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Mon, 25 Sep 2023 12:59:06 +0530 Subject: [PATCH 205/567] Corrected several typos Documentation has been updated with correct words. Please have a look at this and do the needful Thank you! --- tensorflow/python/debug/cli/debugger_cli_common_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/debug/cli/debugger_cli_common_test.py b/tensorflow/python/debug/cli/debugger_cli_common_test.py index 0bef5ab78ce1e6..fad304c271e1db 100644 --- a/tensorflow/python/debug/cli/debugger_cli_common_test.py +++ b/tensorflow/python/debug/cli/debugger_cli_common_test.py @@ -700,7 +700,7 @@ def testWrappingWithAttrCutoff(self): self.assertEqual("shorter wavelength", out.annotations[3]) self.assertFalse(4 in out.annotations) - # Chec that the non-row-index field is present in output. + # Check that the non-row-index field is present in output. self.assertEqual("foo", out.annotations["metadata"]) self.assertEqual(new_line_indices, [0, 1, 3]) From c0c7efcff5bc6bffd2d0668f777196509d6971e8 Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Mon, 25 Sep 2023 13:06:48 +0530 Subject: [PATCH 206/567] Update event_file_writer.py --- tensorflow/python/summary/writer/event_file_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/summary/writer/event_file_writer.py b/tensorflow/python/summary/writer/event_file_writer.py index 09479c2cbbb284..7865c4d845ee2f 100644 --- a/tensorflow/python/summary/writer/event_file_writer.py +++ b/tensorflow/python/summary/writer/event_file_writer.py @@ -137,7 +137,7 @@ def flush(self): disk. """ if not self._closed: - # Request a flush operation by enqueing a sentinel and then waiting for + # Request a flush operation by enqueuing a sentinel and then waiting for # the writer thread to mark the flush as complete. self._flush_complete.clear() self._try_put(self._flush_sentinel) From 99605b91f178a68d1eb8aa6bfb8bd9164a43d255 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 01:50:06 -0700 Subject: [PATCH 207/567] Internal Code Change PiperOrigin-RevId: 568147634 --- tensorflow/compiler/mlir/tfrt/ir/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tfrt/ir/BUILD b/tensorflow/compiler/mlir/tfrt/ir/BUILD index 8e1a0ef9ae750a..80257d4812ecd3 100644 --- a/tensorflow/compiler/mlir/tfrt/ir/BUILD +++ b/tensorflow/compiler/mlir/tfrt/ir/BUILD @@ -70,7 +70,6 @@ cc_library( visibility = [ "//tensorflow/compiler/mlir/tfrt:__subpackages__", "//tensorflow/core/runtime_fallback:internal", - "//tensorflow/core/tfrt/mlrt/application/tensorflow/tests:__subpackages__", ], deps = [ ":tfrt_fallback_common", From bf95ed963c863bfea33de7afad1dc47cafeac1d8 Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:31:21 +0530 Subject: [PATCH 208/567] Update summary.py --- tensorflow/python/summary/summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index da3783b7336ed7..161456a7aecae0 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -97,7 +97,7 @@ def scalar(name, tensor, collections=None, family=None): TF 2.0](https://www.tensorflow.org/tensorboard/migrate#in_tf_1x) for concrete steps for migration. `tf.summary.scalar` can also log training metrics in Keras, you can check [Logging training metrics in - Keras](https://www.tensorflow.org/tensorboard/scalars_and_keras) for detials. + Keras](https://www.tensorflow.org/tensorboard/scalars_and_keras) for details. #### How to Map Arguments From ac05bfd3f4d0733eeb4211f9cbee1d0313aba75a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 02:01:58 -0700 Subject: [PATCH 209/567] Update GraphDef version to 1630. PiperOrigin-RevId: 568149848 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index aae48690687289..3f5ae3ed744546 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1629 // Updated: 2023/9/24 +#define TF_GRAPH_DEF_VERSION 1630 // Updated: 2023/9/25 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 92213dbdb2a8c56b045aac745f466f9d15767435 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 02:02:03 -0700 Subject: [PATCH 210/567] compat: Update forward compatibility horizon to 2023-09-25 PiperOrigin-RevId: 568149868 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 0ee9625866060c..dbb8b5b32bce92 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 24) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 25) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 2d3445b9b9e01470094174ef83f2d7d700f02259 Mon Sep 17 00:00:00 2001 From: sushreebarsa <84765720+sushreebarsa@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:47:19 +0530 Subject: [PATCH 211/567] Update run_models.py --- tensorflow/python/compiler/tensorrt/model_tests/run_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py index 8af5b35af9d03b..e1e8556a61aedb 100644 --- a/tensorflow/python/compiler/tensorrt/model_tests/run_models.py +++ b/tensorflow/python/compiler/tensorrt/model_tests/run_models.py @@ -105,7 +105,7 @@ flags.DEFINE_enum("output_format", "CSV", ["CSV", "JSON"], "Output format of analysis results.") -DEFAUL_TRT_CONVERT_PARAMS = trt.DEFAULT_TRT_CONVERSION_PARAMS +DEFAULT_TRT_CONVERT_PARAMS = trt.DEFAULT_TRT_CONVERSION_PARAMS # pylint: disable=bad-whitespace From 7ff47e53fb2b3d9567f362dcd70066add7c7721c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 03:46:16 -0700 Subject: [PATCH 212/567] Integrate LLVM at llvm/llvm-project@f7bf99fb529f Updates LLVM usage to match [f7bf99fb529f](https://github.com/llvm/llvm-project/commit/f7bf99fb529f) PiperOrigin-RevId: 568169974 --- third_party/llvm/workspace.bzl | 4 ++-- .../mlir/backends/cpu/transforms/sparse_rewrite_passes.cc | 8 ++++---- third_party/xla/xla/translate/hlo_to_mhlo/hlo_utils.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index f24443e14ea083..59331ea1df5291 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "46d5d264fc66a017bbd0182b2b5fcc0f3f23d3be" - LLVM_SHA256 = "f31d546ecdcd07971f7f8f5f6f83ee2e101bf677975fe96d6ba837628eba7e24" + LLVM_COMMIT = "f7bf99fb529f9993548bf6fac0b5523c791f2416" + LLVM_SHA256 = "567656a6da8e3608a81859dcc57e4279ae468936769177d66a775d6800003434" tf_http_archive( name = name, diff --git a/third_party/xla/xla/mlir/backends/cpu/transforms/sparse_rewrite_passes.cc b/third_party/xla/xla/mlir/backends/cpu/transforms/sparse_rewrite_passes.cc index ce2c58cc647f6f..3402f867a3915f 100644 --- a/third_party/xla/xla/mlir/backends/cpu/transforms/sparse_rewrite_passes.cc +++ b/third_party/xla/xla/mlir/backends/cpu/transforms/sparse_rewrite_passes.cc @@ -242,8 +242,8 @@ struct SparseDynSliceCallRewriter { auto srcEnc = retTp.getEncoding().cast(); auto sliceEnc = sparse_tensor::SparseTensorEncodingAttr::get( - ctx, srcEnc.getLvlTypes(), srcEnc.getDimToLvl(), srcEnc.getPosWidth(), - srcEnc.getCrdWidth(), slice_attrs); + ctx, srcEnc.getLvlTypes(), srcEnc.getDimToLvl(), srcEnc.getLvlToDim(), + srcEnc.getPosWidth(), srcEnc.getCrdWidth(), slice_attrs); auto sliceTp = RankedTensorType::get(retTp.getShape(), retTp.getElementType(), sliceEnc); @@ -351,8 +351,8 @@ struct SparseSliceCallRewriter { retTp.getEncoding().cast(); // TODO(peiming): add a getSliceEncodingFrom into MLIR upstream. auto sliceEnc = sparse_tensor::SparseTensorEncodingAttr::get( - ctx, srcEnc.getLvlTypes(), srcEnc.getDimToLvl(), srcEnc.getPosWidth(), - srcEnc.getCrdWidth(), slice_attrs); + ctx, srcEnc.getLvlTypes(), srcEnc.getDimToLvl(), srcEnc.getLvlToDim(), + srcEnc.getPosWidth(), srcEnc.getCrdWidth(), slice_attrs); auto sliceTp = RankedTensorType::get(retTp.getShape(), retTp.getElementType(), sliceEnc); auto slice = rewriter.create( diff --git a/third_party/xla/xla/translate/hlo_to_mhlo/hlo_utils.h b/third_party/xla/xla/translate/hlo_to_mhlo/hlo_utils.h index 9ca495016aeb93..5cc3fc571d281d 100644 --- a/third_party/xla/xla/translate/hlo_to_mhlo/hlo_utils.h +++ b/third_party/xla/xla/translate/hlo_to_mhlo/hlo_utils.h @@ -127,8 +127,8 @@ static StatusOr ConvertTensorShapeToType(const Shape& xla_ty, auto id_map = mlir::AffineMap::getPermutationMap(major_to_minor, builder.getContext()); // TODO(atondwal): support sizes other than 32 when XLA does - encoding = SparseTensorEncodingAttr::get(builder.getContext(), dlts, - id_map, 32, 32); + encoding = SparseTensorEncodingAttr::get( + builder.getContext(), dlts, id_map, mlir::AffineMap(), 32, 32); } } return TypeT::get(shape, element_type_or.value(), encoding); From 9a0c0322dccc8a84a190a76ce9506eacd4d9c456 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 04:27:31 -0700 Subject: [PATCH 213/567] Update TFRT dependency to use revision http://github.com/tensorflow/runtime/commit/cf730a32058241ae24fd12fe0a9a0258faa0ca89. PiperOrigin-RevId: 568177684 --- third_party/tf_runtime/workspace.bzl | 4 ++-- third_party/xla/third_party/tf_runtime/workspace.bzl | 4 ++-- .../xla/third_party/tsl/third_party/tf_runtime/workspace.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/tf_runtime/workspace.bzl b/third_party/tf_runtime/workspace.bzl index 66c8f974bc5a69..94cc1beb5b2f9c 100644 --- a/third_party/tf_runtime/workspace.bzl +++ b/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "549bf94d9643e0a1a9ea71949fde9e0a21dd30ea" - TFRT_SHA256 = "d8550a2abb57a78bd786947104d71f7735c96ed68b672e9afe2c45f143488ade" + TFRT_COMMIT = "cf730a32058241ae24fd12fe0a9a0258faa0ca89" + TFRT_SHA256 = "7fcc709032d16aa5f2cf04e0c4cbcdf9e0dbafb8377e141d2c0074f24ede4066" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tf_runtime/workspace.bzl index 66c8f974bc5a69..94cc1beb5b2f9c 100644 --- a/third_party/xla/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "549bf94d9643e0a1a9ea71949fde9e0a21dd30ea" - TFRT_SHA256 = "d8550a2abb57a78bd786947104d71f7735c96ed68b672e9afe2c45f143488ade" + TFRT_COMMIT = "cf730a32058241ae24fd12fe0a9a0258faa0ca89" + TFRT_SHA256 = "7fcc709032d16aa5f2cf04e0c4cbcdf9e0dbafb8377e141d2c0074f24ede4066" tf_http_archive( name = "tf_runtime", diff --git a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl index 66c8f974bc5a69..94cc1beb5b2f9c 100644 --- a/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl +++ b/third_party/xla/third_party/tsl/third_party/tf_runtime/workspace.bzl @@ -6,8 +6,8 @@ def repo(): """Imports TFRT.""" # Attention: tools parse and update these lines. - TFRT_COMMIT = "549bf94d9643e0a1a9ea71949fde9e0a21dd30ea" - TFRT_SHA256 = "d8550a2abb57a78bd786947104d71f7735c96ed68b672e9afe2c45f143488ade" + TFRT_COMMIT = "cf730a32058241ae24fd12fe0a9a0258faa0ca89" + TFRT_SHA256 = "7fcc709032d16aa5f2cf04e0c4cbcdf9e0dbafb8377e141d2c0074f24ede4066" tf_http_archive( name = "tf_runtime", From 5edb674773776a44e24a73bcab94a38d7e15dab4 Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Mon, 25 Sep 2023 07:52:57 -0700 Subject: [PATCH 214/567] Make a TF Dialect to Executor API, which is used in the Bridge Phase 1. We'll separate out the current implementation in bridge.cc which currently always runs as part of the Bridge. PiperOrigin-RevId: 568218119 --- tensorflow/compiler/mlir/tf2xla/api/v2/BUILD | 22 ++++++++++ .../tf2xla/api/v2/tf_dialect_to_executor.cc | 34 +++++++++++++++ .../tf2xla/api/v2/tf_dialect_to_executor.h | 42 +++++++++++++++++++ .../api/v2/tf_dialect_to_executor_test.cc | 37 ++++++++++++++++ 4 files changed, 135 insertions(+) create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD index 0ff72d68d418c6..67a5fba6ccccf0 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD @@ -116,3 +116,25 @@ tf_cc_test( "@local_tsl//tsl/lib/core:status_test_util", ], ) + +cc_library( + name = "tf_dialect_to_executor", + srcs = ["tf_dialect_to_executor.cc"], + hdrs = ["tf_dialect_to_executor.h"], + deps = [ + "//tensorflow/core/platform:status", + "@llvm-project//mlir:IR", + "@local_tsl//tsl/platform:status", + ], +) + +tf_cc_test( + name = "tf_dialect_to_executor_test", + srcs = ["tf_dialect_to_executor_test.cc"], + deps = [ + ":tf_dialect_to_executor", + "@com_google_googletest//:gtest_main", + "@llvm-project//mlir:IR", + "@local_tsl//tsl/lib/core:status_test_util", + ], +) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc new file mode 100644 index 00000000000000..df04bfed17fd43 --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc @@ -0,0 +1,34 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h" + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tensorflow/core/platform/status.h" +#include "tsl/platform/status.h" + +namespace tensorflow { +namespace tf2xla { +namespace v2 { + +using mlir::ModuleOp; + +tensorflow::Status ExportFromTensorflowDialectToExecutor(ModuleOp module) { + return tsl::OkStatus(); +} + +} // namespace v2 +} // namespace tf2xla +} // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h new file mode 100644 index 00000000000000..16b9ad252b7c2f --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h @@ -0,0 +1,42 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V2_TF_DIALECT_TO_EXECUTOR_H_ +#define TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V2_TF_DIALECT_TO_EXECUTOR_H_ + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tensorflow/core/platform/status.h" + +namespace tensorflow { +namespace tf2xla { +namespace v2 { + +// Given the input Module op that's in the Tensorflow Dialect, convert the MLIR +// module in place to the Tensorflow Executor Dialect. Returns an OK Status if +// success, otherwise failure with an error message. +// The Tensorflow Executor Dialect is required to export an MLIR module to a +// Tensorflow GraphDef. This API will add control dependencies and verify that +// the conversion was successful. +// +// Input: A MLIR Module in the Tensorflow Dialect with no +// `tf_device.cluster_func` ops. +// Output: A MLIR module in the Tensorflow Executor Dialect. +tensorflow::Status ExportFromTensorflowDialectToExecutor(mlir::ModuleOp module); + +} // namespace v2 +} // namespace tf2xla +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TF2XLA_API_V2_TF_DIALECT_TO_EXECUTOR_H_ diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc new file mode 100644 index 00000000000000..e43e82098066a8 --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc @@ -0,0 +1,37 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h" + +#include +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "tsl/lib/core/status_test_util.h" + +namespace tensorflow { +namespace tf2xla { +namespace v2 { +namespace { + +using mlir::ModuleOp; + +TEST(TensorflowDialectToExecutor, ConvertsToExecutor) { + ModuleOp module; + TF_ASSERT_OK(ExportFromTensorflowDialectToExecutor(module)); +} + +} // namespace +} // namespace v2 +} // namespace tf2xla +} // namespace tensorflow From b2a70ab4944fc4c54e8b6bf1b3bfcfc28d2abaad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 09:17:16 -0700 Subject: [PATCH 215/567] [XLA:GPU] Allow fusing binary elementwise operations where exactly one operand is a splat constant into normalization diamonds. PiperOrigin-RevId: 568238994 --- third_party/xla/xla/service/gpu/BUILD | 1 + .../xla/xla/service/gpu/ir_emitter_triton.cc | 12 +- .../ir_emitter_triton_parametrized_test.cc | 402 ++++++++++++++++++ .../service/gpu/softmax_rewriter_triton.cc | 67 ++- .../gpu/softmax_rewriter_triton_test.cc | 320 ++++++++++++++ 5 files changed, 789 insertions(+), 13 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index faf60063e796dc..9d988927395b17 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -1326,6 +1326,7 @@ cc_library( "//xla:util", "//xla:xla_data_proto_cc", "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_query", "//xla/service:hlo_pass", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc index 9b7fbf675625ae..bb257683e203e2 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc @@ -598,8 +598,16 @@ StatusOr EmitReduce(ImplicitLocOpBuilder& b, b.setInsertionPointAfter(reduction); } - return Cast(b, reduction.getResult().front(), - TritonType(b, hlo_reduce.shape().element_type())); + Value result = reduction.getResult().front(); + + // We want to return a tensor of float32, but the ReturnReduceOp produces an + // f32 constant when reducing a single dim. To convert to a tensor we splat + // the result. + if (!reduction.getResult().front().dyn_cast()) { + result = Splat(b, result, {}); + } + + return Cast(b, result, TritonType(b, hlo_reduce.shape().element_type())); } // Emit sequence of instructions using compatible tiling ordered producers diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc index 09af76884ead91..9be7fdbede4e83 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_parametrized_test.cc @@ -1893,6 +1893,408 @@ ENTRY main { EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec(/*aabs=*/0, /*arel=*/0))); } +TEST_P(TritonSoftmaxTest, CanFuseAndEmitRMSNormDiamond) { + PrimitiveType data_type = GetParam(); + + if (data_type == F16) { + GTEST_SKIP() << "rsqrt op does not support F16."; + } else if (data_type == BF16 && !GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << R"(No BF16 before Ampere. Pre-Ampere BF16 behavior is tested + in CanFuseAndEmitFirstSoftmaxDiamond, and in SoftmaxRewriterTritonTest.)"; + } + + const std::string hlo_text_template = R"( +HloModule rms_norm +add_computation { + arg_0 = $0[] parameter(0) + arg_1 = $0[] parameter(1) + ROOT add.1 = $0[] add(arg_0, arg_1) +} +ENTRY main.30 { + param_0 = $0[10,10,10,128]{3,2,1,0} parameter(0) + multiply_param = $0[10,10,10,128]{3,2,1,0} multiply(param_0, param_0) + constant_0 = $0[] constant(0) + reduce = $0[10,10,10]{2,1,0} reduce(multiply_param, constant_0), dimensions={3}, to_apply=add_computation + constant_1 = $0[] constant(0.333333343) + splat = $0[10,10,10]{2,1,0} broadcast(constant_1), dimensions={} + multiply_splat = $0[10,10,10]{2,1,0} multiply(reduce, splat) + epsilon = $0[] constant(1e-06) + splat_epsilon = $0[10,10,10]{2,1,0} broadcast(epsilon), dimensions={} + add = $0[10,10,10]{2,1,0} add(multiply_splat, splat_epsilon) + rsqrt = $0[10,10,10]{2,1,0} rsqrt(add) + broadcast = $0[10,10,10,128]{3,2,1,0} broadcast(rsqrt), dimensions={0,1,2} + ROOT multiply = $0[10,10,10,128]{3,2,1,0} multiply(param_0, broadcast) +} +)"; + + const std::string hlo_text = absl::Substitute( + hlo_text_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + const std::string hlo_ref_template = R"( +; CHECK: ENTRY +; CHECK: %[[P0:.*]] = $0[10,10,10,128]{3,2,1,0} parameter(0) +; CHECK: ROOT +; CHECK-SAME: fusion(%[[P0]]) +; CHECK-SAME: kind=kCustom +; CHECK-SAME: __triton_softmax +)"; + + const std::string hlo_ref = absl::Substitute( + hlo_ref_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + MatchOptimizedHlo(hlo_text, hlo_ref); + + float tolerance; + switch (data_type) { + case F32: + tolerance = 1e-6; + break; + case F16: + tolerance = 2e-4; + break; + case BF16: + tolerance = 4e-2; + break; + default: + ABSL_UNREACHABLE(); + } + EXPECT_TRUE(RunAndCompare(hlo_text, + ErrorSpec(/*aabs=*/tolerance, /*arel=*/tolerance))); +} + +TEST_P( + TritonSoftmaxTest, + CanFuseAndEmitBinaryElementwiseWhereTheFirstOperandIsASplatConstantBetweenDiamonds) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + + if (data_type == BF16 && !GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << R"(No BF16 before Ampere. Pre-Ampere BF16 behavior is tested + in CanFuseAndEmitFirstSoftmaxDiamond, and in SoftmaxRewriterTritonTest.)"; + } + + const std::string hlo_text_template = R"( +HloModule fusible_diamonds +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + subtract = $0[127,125]{1,0} subtract(param_0, broadcast) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + multiply = $0[127,125]{1,0} multiply(broadcast_splat, subtract) + constant_zero = $0[] constant(0) + second_reduce = $0[127]{0} reduce(multiply, constant_zero), dimensions={1}, to_apply=add_computation + second_broadcast = $0[127,125]{1,0} broadcast(second_reduce), dimensions={0} + ROOT second_subtract = $0[127,125]{1,0} subtract(multiply, second_broadcast) +} +)"; + + const std::string hlo_text = absl::Substitute( + hlo_text_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + const std::string hlo_ref_template = R"( +; CHECK: ENTRY +; CHECK: %[[P0:.*]] = $0[127,125]{1,0} parameter(0) +; CHECK: ROOT +; CHECK-SAME: fusion(%[[P0]]) +; CHECK-SAME: kind=kCustom +; CHECK-SAME: __triton_softmax +)"; + + const std::string hlo_ref = absl::Substitute( + hlo_ref_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + MatchOptimizedHlo(hlo_text, hlo_ref); + + float tolerance; + switch (data_type) { + case F32: + tolerance = 1e-6; + break; + case F16: + tolerance = 2e-4; + break; + case BF16: + tolerance = 2e-2; + break; + default: + ABSL_UNREACHABLE(); + } + EXPECT_TRUE(RunAndCompare(hlo_text, + ErrorSpec(/*aabs=*/tolerance, /*arel=*/tolerance))); +} + +TEST_P( + TritonSoftmaxTest, + CanFuseAndEmitBinaryElementwiseWhereTheSecondOperandIsASplatConstantBetweenDiamonds) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + + if (data_type == BF16 && !GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << R"(No BF16 before Ampere. Pre-Ampere BF16 behavior is tested + in CanFuseAndEmitFirstSoftmaxDiamond, and in SoftmaxRewriterTritonTest.)"; + } + + const std::string hlo_text_template = R"( +HloModule fusible_diamonds +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + subtract = $0[127,125]{1,0} subtract(param_0, broadcast) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + multiply = $0[127,125]{1,0} multiply(subtract, broadcast_splat) + constant_zero = $0[] constant(0) + second_reduce = $0[127]{0} reduce(multiply, constant_zero), dimensions={1}, to_apply=add_computation + second_broadcast = $0[127,125]{1,0} broadcast(second_reduce), dimensions={0} + ROOT second_subtract = $0[127,125]{1,0} subtract(multiply, second_broadcast) +} +)"; + + const std::string hlo_text = absl::Substitute( + hlo_text_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + const std::string hlo_ref_template = R"( +; CHECK: ENTRY +; CHECK: %[[P0:.*]] = $0[127,125]{1,0} parameter(0) +; CHECK: ROOT +; CHECK-SAME: fusion(%[[P0]]) +; CHECK-SAME: kind=kCustom +; CHECK-SAME: __triton_softmax +)"; + + const std::string hlo_ref = absl::Substitute( + hlo_ref_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + MatchOptimizedHlo(hlo_text, hlo_ref); + + float tolerance; + switch (data_type) { + case F32: + tolerance = 1e-6; + break; + case F16: + tolerance = 2e-4; + break; + case BF16: + tolerance = 2e-2; + break; + default: + ABSL_UNREACHABLE(); + } + EXPECT_TRUE(RunAndCompare(hlo_text, + ErrorSpec(/*aabs=*/tolerance, /*arel=*/tolerance))); +} + +TEST_P( + TritonSoftmaxTest, + CanFuseAndEmitBinaryElementwiseWhereTheFirstOperandIsASplatConstantWithinDiamond) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + + if (data_type == BF16 && !GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << R"(No BF16 before Ampere. Pre-Ampere BF16 behavior is tested + in CanFuseAndEmitFirstSoftmaxDiamond, and in SoftmaxRewriterTritonTest.)"; + } + + const std::string hlo_text_template = R"( +HloModule fusible_diamond +max_computation { + arg_0 = $0[] parameter(0) + arg_1 = $0[] parameter(1) + ROOT maximum = $0[] maximum(arg_0, arg_1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=max_computation + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127]{0} broadcast(constant), dimensions={} + multiply = $0[127]{0} multiply(broadcast_splat, reduce) + broadcast = $0[127,125]{1,0} broadcast(multiply), dimensions={0} + ROOT subtract = $0[127,125]{1,0} subtract(param_0, broadcast) +} +)"; + + const std::string hlo_text = absl::Substitute( + hlo_text_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + const std::string hlo_ref_template = R"( +; CHECK: ENTRY +; CHECK: %[[P0:.*]] = $0[127,125]{1,0} parameter(0) +; CHECK: ROOT +; CHECK-SAME: fusion(%[[P0]]) +; CHECK-SAME: kind=kCustom +; CHECK-SAME: __triton_softmax +)"; + + const std::string hlo_ref = absl::Substitute( + hlo_ref_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + MatchOptimizedHlo(hlo_text, hlo_ref); + + float tolerance; + switch (data_type) { + case F32: + tolerance = 1e-6; + break; + case F16: + tolerance = 2e-4; + break; + case BF16: + tolerance = 2e-2; + break; + default: + ABSL_UNREACHABLE(); + } + EXPECT_TRUE(RunAndCompare(hlo_text, + ErrorSpec(/*aabs=*/tolerance, /*arel=*/tolerance))); +} + +TEST_P( + TritonSoftmaxTest, + CanFuseAndEmitBinaryElementwiseConsumerWhereTheFirstOperandIsASplatConstantIntoDiamond) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + + if (data_type == BF16 && !GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << R"(No BF16 before Ampere. Pre-Ampere BF16 behavior is tested + in CanFuseAndEmitFirstSoftmaxDiamond, and in SoftmaxRewriterTritonTest.)"; + } + + const std::string hlo_text_template = R"( +HloModule fusible_diamond +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + subtract = $0[127,125]{1,0} subtract(param_0, broadcast) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + ROOT multiply = $0[127,125]{1,0} multiply(broadcast_splat, subtract) +} +)"; + + const std::string hlo_text = absl::Substitute( + hlo_text_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + const std::string hlo_ref_template = R"( +; CHECK: ENTRY +; CHECK: %[[P0:.*]] = $0[127,125]{1,0} parameter(0) +; CHECK: ROOT +; CHECK-SAME: fusion(%[[P0]]) +; CHECK-SAME: kind=kCustom +; CHECK-SAME: __triton_softmax +)"; + const std::string hlo_ref = absl::Substitute( + hlo_ref_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + MatchOptimizedHlo(hlo_text, hlo_ref); + + float tolerance; + switch (data_type) { + case F32: + tolerance = 1e-6; + break; + case F16: + tolerance = 2e-4; + break; + case BF16: + tolerance = 2e-2; + break; + default: + ABSL_UNREACHABLE(); + } + EXPECT_TRUE(RunAndCompare(hlo_text, + ErrorSpec(/*aabs=*/tolerance, /*arel=*/tolerance))); +} + +TEST_P( + TritonSoftmaxTest, + CanFuseAndEmitBinaryElementwiseProducerWhereTheFirstOperandIsASplatConstantIntoDiamond) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + + if (data_type == BF16 && !GetCudaComputeCapability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + GTEST_SKIP() << R"(No BF16 before Ampere. Pre-Ampere BF16 behavior is tested + in CanFuseAndEmitFirstSoftmaxDiamond, and in SoftmaxRewriterTritonTest.)"; + } + + const std::string hlo_text_template = R"( +HloModule fusible_diamond +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + + param_0 = $0[127,125]{1,0} parameter(0) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + multiply = $0[127,125]{1,0} multiply(broadcast_splat, param_0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(multiply, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + ROOT subtract = $0[127,125]{1,0} subtract(multiply, broadcast) +} +)"; + + const std::string hlo_text = absl::Substitute( + hlo_text_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + const std::string hlo_ref_template = R"( +; CHECK: ENTRY +; CHECK: %[[P0:.*]] = $0[127,125]{1,0} parameter(0) +; CHECK: ROOT +; CHECK-SAME: fusion(%[[P0]]) +; CHECK-SAME: kind=kCustom +; CHECK-SAME: __triton_softmax +)"; + const std::string hlo_ref = absl::Substitute( + hlo_ref_template, primitive_util::LowercasePrimitiveTypeName(data_type)); + + MatchOptimizedHlo(hlo_text, hlo_ref); + + float tolerance; + switch (data_type) { + case F32: + tolerance = 1e-6; + break; + case F16: + tolerance = 2e-4; + break; + case BF16: + tolerance = 2e-2; + break; + default: + ABSL_UNREACHABLE(); + } + EXPECT_TRUE(RunAndCompare(hlo_text, + ErrorSpec(/*aabs=*/tolerance, /*arel=*/tolerance))); +} + INSTANTIATE_TEST_SUITE_P(TritonSoftmaxTestSuite, TritonSoftmaxTest, ::testing::Values(F32, F16, BF16)); diff --git a/third_party/xla/xla/service/gpu/softmax_rewriter_triton.cc b/third_party/xla/xla/service/gpu/softmax_rewriter_triton.cc index ebc87378a7751b..ce3dcacebc23e2 100644 --- a/third_party/xla/xla/service/gpu/softmax_rewriter_triton.cc +++ b/third_party/xla/xla/service/gpu/softmax_rewriter_triton.cc @@ -28,6 +28,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/hlo/ir/hlo_opcode.h" +#include "xla/hlo/utils/hlo_query.h" #include "xla/layout_util.h" #include "xla/service/gpu/backend_configs.pb.h" #include "xla/service/gpu/gemm_rewriter_triton.h" @@ -123,6 +124,26 @@ bool BitcastIsTilingNoop(HloInstruction* bitcast, last_dimension(bitcast->operand(0)) == last_dimension(bitcast))); } +inline bool HasOneUse(const HloInstruction* instr) { + return instr->user_count() == 1; +} + +using hlo_query::IsBroadcastOfScalarConstant; + +// Chooses which operand to use for fusion processing. Taking in a unary or +// binary instruction, returns the first non-splat operand. If none is +// present, returns any operand. +HloInstruction* ChooseOperandForFusionProcessing(HloInstruction* instr) { + CHECK_GT(instr->operand_count(), 0); + CHECK_LE(instr->operand_count(), 2); + + if (instr->operand_count() > 1 && + IsBroadcastOfScalarConstant(*instr->operand(0))) { + return instr->mutable_operand(1); + } + return instr->mutable_operand(0); +} + bool IsTriviallyFusible(HloInstruction* instr, const GpuVersion& gpu_version, int num_allowed_users = 1) { // Checks whether an op is trivially fusible. An op is said to be trivially @@ -144,8 +165,36 @@ bool IsTriviallyFusible(HloInstruction* instr, const GpuVersion& gpu_version, return IsTritonSupportedInstruction(instr, gpu_version); } - if (instr->IsElementwiseBinary() && instr->operand(0) == instr->operand(1)) { - return IsTritonSupportedInstruction(instr, gpu_version); + // Elementwise binary ops are trivially fusible if the operands are the same, + // or if exactly one of the operands is a splat constant with a single user. + if (instr->IsElementwiseBinary()) { + const HloInstruction* operand_0 = instr->operand(0); + const HloInstruction* operand_1 = instr->operand(1); + + // Elementwise binary ops should be fused if both operands are the same and + // if the operand is triton supported. + if (operand_0 == operand_1) { + return IsTritonSupportedInstruction(instr, gpu_version); + } + + // If either operand is a splat constant with multiple users, we should not + // fuse. + bool operand_0_is_shared_splat_constant = + IsBroadcastOfScalarConstant(*operand_0) && !HasOneUse(operand_0); + bool operand_1_is_shared_splat_constant = + IsBroadcastOfScalarConstant(*operand_1) && !HasOneUse(operand_1); + + if (operand_0_is_shared_splat_constant || + operand_1_is_shared_splat_constant) { + return false; + } + + // For simplicity we only fuse elementwise binary ops with splat operands + // if they contain one non-splat operand. + if (IsBroadcastOfScalarConstant(*operand_0) ^ + IsBroadcastOfScalarConstant(*operand_1)) { + return IsTritonSupportedInstruction(instr, gpu_version); + } } return false; @@ -155,7 +204,7 @@ bool TrivialEdge(HloInstruction** producer, HloInstruction* consumer, HloOpcode opcode, const GpuVersion& gpu_version) { while (consumer->opcode() != opcode) { if (IsTriviallyFusible(consumer, gpu_version)) { - consumer = consumer->mutable_operand(0); + consumer = ChooseOperandForFusionProcessing(consumer); } else { return false; } @@ -189,10 +238,6 @@ bool IsTriviallyConnectedProducerOf(HloInstruction* producer, return false; } -inline bool HasOneUse(const HloInstruction* instr) { - return instr->user_count() == 1; -} - bool IsTritonSupportedComputation(const HloComputation* computation, const GpuVersion& gpu_version) { for (const HloInstruction* instr : computation->instructions()) { @@ -264,7 +309,7 @@ std::optional MatchesTritonCompatibleClosedReductionDiamond( } while (IsTriviallyFusible(producer, gpu_version)) { - producer = producer->mutable_operand(0); + producer = ChooseOperandForFusionProcessing(producer); } if (!HasDefaultLayout(producer->shape()) || @@ -288,9 +333,9 @@ HloInstruction* FindFirstNonFusibleDiamondProducer( HloInstruction* diamond_producer, const GpuVersion& gpu_version) { if (IsTriviallyFusible(diamond_producer, gpu_version, /*num_allowed_users=*/2)) { - diamond_producer = diamond_producer->mutable_operand(0); + diamond_producer = ChooseOperandForFusionProcessing(diamond_producer); while (IsTriviallyFusible(diamond_producer, gpu_version)) { - diamond_producer = diamond_producer->mutable_operand(0); + diamond_producer = ChooseOperandForFusionProcessing(diamond_producer); } } @@ -393,7 +438,7 @@ SoftmaxRewriterTriton::FindAllFusibleDiamondChains( [](HloInstruction* diamond_root) { HloInstruction* instr = diamond_root->mutable_operand(1); while (instr->opcode() != HloOpcode::kReduce) { - instr = instr->mutable_operand(0); + instr = ChooseOperandForFusionProcessing(instr); } int operand_rank = instr->operand(0)->shape().rank(); diff --git a/third_party/xla/xla/service/gpu/softmax_rewriter_triton_test.cc b/third_party/xla/xla/service/gpu/softmax_rewriter_triton_test.cc index 13d98d1823c4c8..c4c506064f983d 100644 --- a/third_party/xla/xla/service/gpu/softmax_rewriter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/softmax_rewriter_triton_test.cc @@ -1404,6 +1404,326 @@ ENTRY main { GmockMatch(m::Fusion(m::Parameter()))); } +TEST_F( + SoftmaxRewriterTritonTest, + DoesNotFuseIntermediateBinaryElementwiseWithBothSplatOperandsIntoDiamond) { + const std::string hlo_string = R"( +HloModule nonfusible_splat +max_computation { + arg_0 = f32[] parameter(0) + arg_1 = f32[] parameter(1) + ROOT maximum = f32[] maximum(arg_0, arg_1) +} +ENTRY main { + constant_0 = f32[] constant(0.333333343) + splat_0 = f32[127,125]{1,0} broadcast(constant_0), dimensions={} + constant_1 = f32[] constant(0.66666) + splat_1 = f32[127,125]{1,0} broadcast(constant_1), dimensions={} + param_0 = f32[127,125]{1,0} parameter(0) + multiply_splats = f32[127,125]{1,0} multiply(splat_0, splat_1) + multiply_splat_param = f32[127,125]{1,0} multiply(multiply_splats, param_0) + constant_neg_inf = f32[] constant(-inf) + reduce = f32[127]{0} reduce(multiply_splat_param, constant_neg_inf), dimensions={1}, to_apply=max_computation + broadcast = f32[127,125]{1,0} broadcast(reduce), dimensions={0} + ROOT subtract = f32[127,125]{1,0} subtract(param_0, broadcast) +} +)"; + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_FALSE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); +} + +TEST_F( + SoftmaxRewriterTritonTest, + DoesNotFuseIntermediateBinaryElementwiseWithSameSplatOperandsIntoDiamond) { + const std::string hlo_string = R"( +HloModule nonfusible_splat_diamond +max_computation { + arg_0 = f32[] parameter(0) + arg_1 = f32[] parameter(1) + ROOT maximum = f32[] maximum(arg_0, arg_1) +} +ENTRY main { + constant_0 = f32[] constant(0.333333343) + splat = f32[127,125]{1,0} broadcast(constant_0), dimensions={} + param_0 = f32[127,125]{1,0} parameter(0) + multiply = f32[127,125]{1,0} multiply(splat, splat) + add = f32[127,125]{1,0} add(param_0, multiply) + constant_neg_inf = f32[] constant(-inf) + reduce = f32[127]{0} reduce(add, constant_neg_inf), dimensions={1}, to_apply=max_computation + broadcast = f32[127,125]{1,0} broadcast(reduce), dimensions={0} + ROOT subtract = f32[127,125]{1,0} subtract(param_0, broadcast) +} +)"; + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + SoftmaxRewriterTriton fusion_rewriter(gpu_version_); + EXPECT_FALSE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); +} + +TEST_P(SoftmaxRewriterTritonTest, CanFuseRMSNormDiamond) { + PrimitiveType data_type = GetParam(); + const std::string hlo_string_template = R"( +HloModule rms_norm +add_computation { + arg_0 = $0[] parameter(0) + arg_1 = $0[] parameter(1) + ROOT add.1 = $0[] add(arg_0, arg_1) +} +ENTRY main.30 { + param_0 = $0[10,10,10,128]{3,2,1,0} parameter(0) + multiply_param = $0[10,10,10,128]{3,2,1,0} multiply(param_0, param_0) + constant_0 = $0[] constant(0) + reduce = $0[10,10,10]{2,1,0} reduce(multiply_param, constant_0), dimensions={3}, to_apply=add_computation + constant_1 = $0[] constant(0.333333343) + splat = $0[10,10,10]{2,1,0} broadcast(constant_1), dimensions={} + multiply_splat = $0[10,10,10]{2,1,0} multiply(reduce, splat) + epsilon = $0[] constant(1e-06) + splat_epsilon = $0[10,10,10]{2,1,0} broadcast(epsilon), dimensions={} + add = $0[10,10,10]{2,1,0} add(multiply_splat, splat_epsilon) + rsqrt = $0[10,10,10]{2,1,0} rsqrt(add) + broadcast = $0[10,10,10,128]{3,2,1,0} broadcast(rsqrt), dimensions={0,1,2} + ROOT multiply = $0[10,10,10,128]{3,2,1,0} multiply(param_0, broadcast) +} +)"; + const std::string hlo_string = + absl::Substitute(hlo_string_template, + primitive_util::LowercasePrimitiveTypeName(data_type)); + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + + switch (data_type) { + case F32: + case BF16: + EXPECT_TRUE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()) + .value()); + EXPECT_TRUE(verifier().Run(module.get()).status().ok()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter()))); + break; + case F16: + // Triton does not support F16 rsqrt. + EXPECT_FALSE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()) + .value()); + break; + default: + ABSL_UNREACHABLE(); + } +} + +TEST_P( + SoftmaxRewriterTritonTest, + CanFuseAndEmitBinaryElementwiseWhereTheFirstOperandIsASplatConstantBetweenDiamonds) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + const std::string hlo_string_template = R"( +HloModule fusible_diamonds +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + subtract = $0[127,125]{1,0} subtract(param_0, broadcast) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + multiply = $0[127,125]{1,0} multiply(broadcast_splat, subtract) + constant_zero = $0[] constant(0) + second_reduce = $0[127]{0} reduce(multiply, constant_zero), dimensions={1}, to_apply=add_computation + second_broadcast = $0[127,125]{1,0} broadcast(second_reduce), dimensions={0} + ROOT second_subtract = $0[127,125]{1,0} subtract(multiply, second_broadcast) +} +)"; + const std::string hlo_string = + absl::Substitute(hlo_string_template, + primitive_util::LowercasePrimitiveTypeName(data_type)); + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_TRUE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); + EXPECT_TRUE(verifier().Run(module.get()).status().ok()); + VLOG(2) << module->ToString(); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter()))); +} + +TEST_P( + SoftmaxRewriterTritonTest, + CanFuseAndEmitBinaryElementwiseWhereTheSecondOperandIsASplatConstantBetweenDiamonds) { // NOLINT(whitespace/line_length) + PrimitiveType data_type = GetParam(); + const std::string hlo_string_template = R"( +HloModule fusible_diamonds +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + subtract = $0[127,125]{1,0} subtract(param_0, broadcast) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + multiply = $0[127,125]{1,0} multiply(subtract, broadcast_splat) + constant_zero = $0[] constant(0) + second_reduce = $0[127]{0} reduce(multiply, constant_zero), dimensions={1}, to_apply=add_computation + second_broadcast = $0[127,125]{1,0} broadcast(second_reduce), dimensions={0} + ROOT second_subtract = $0[127,125]{1,0} subtract(multiply, second_broadcast) +} +)"; + const std::string hlo_string = + absl::Substitute(hlo_string_template, + primitive_util::LowercasePrimitiveTypeName(data_type)); + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_TRUE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); + EXPECT_TRUE(verifier().Run(module.get()).status().ok()); + VLOG(2) << module->ToString(); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter()))); +} + +TEST_P( + SoftmaxRewriterTritonTest, + + CanFuseBinaryElementwiseWhereTheFirstOperandIsASplatConstantWithinDiamond) { + PrimitiveType data_type = GetParam(); + const std::string hlo_string_template = R"( +HloModule fusible_diamond +max_computation { + arg_0 = $0[] parameter(0) + arg_1 = $0[] parameter(1) + ROOT maximum = $0[] maximum(arg_0, arg_1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=max_computation + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127]{0} broadcast(constant), dimensions={} + multiply = $0[127]{0} multiply(broadcast_splat, reduce) + broadcast = $0[127,125]{1,0} broadcast(multiply), dimensions={0} + ROOT subtract = $0[127,125]{1,0} subtract(param_0, broadcast) +} +)"; + const std::string hlo_string = + absl::Substitute(hlo_string_template, + primitive_util::LowercasePrimitiveTypeName(data_type)); + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_TRUE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); + EXPECT_TRUE(verifier().Run(module.get()).status().ok()); + VLOG(2) << module->ToString(); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter()))); +} + +TEST_P(SoftmaxRewriterTritonTest, + CanFuseBinaryElementwiseConsumerWhereTheFirstOperandIsASplatConstant) { + PrimitiveType data_type = GetParam(); + const std::string hlo_string_template = R"( +HloModule fusible_diamond +add_computation { + arg_0.1 = $0[] parameter(0) + arg_1.1 = $0[] parameter(1) + ROOT add = $0[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = $0[127,125]{1,0} parameter(0) + constant_neg_inf = $0[] constant(-inf) + reduce = $0[127]{0} reduce(param_0, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = $0[127,125]{1,0} broadcast(reduce), dimensions={0} + subtract = $0[127,125]{1,0} subtract(param_0, broadcast) + constant = $0[] constant(0.333333343) + broadcast_splat = $0[127,125]{1,0} broadcast(constant), dimensions={} + ROOT multiply = $0[127,125]{1,0} multiply(broadcast_splat, subtract) +} +)"; + const std::string hlo_string = + absl::Substitute(hlo_string_template, + primitive_util::LowercasePrimitiveTypeName(data_type)); + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_TRUE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); + EXPECT_TRUE(verifier().Run(module.get()).status().ok()); + VLOG(2) << module->ToString(); + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter()))); +} + +TEST_F( + SoftmaxRewriterTritonTest, + DoesNotFuseBinaryElementwiseOperationWhereOneOperandIsASharedSplatProducer) { // NOLINT(whitespace/line_length) + const std::string hlo_string = R"( +HloModule nonfusible_diamond +add_computation { + arg_0.1 = f32[] parameter(0) + arg_1.1 = f32[] parameter(1) + ROOT add = f32[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = f32[127,125]{1,0} parameter(0) + constant.2 = f32[] constant(0.333333343) + broadcast_splat = f32[127,125]{1,0} broadcast(constant.2), dimensions={} + param_1 = f32[127,125]{1,0} parameter(1) + multiply_splat = f32[127,125]{1,0} multiply(broadcast_splat, param_1) + multiply = f32[127,125]{1,0} multiply(param_0, broadcast_splat) + constant_neg_inf = f32[] constant(-inf) + reduce = f32[127]{0} reduce(multiply, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = f32[127,125]{1,0} broadcast(reduce), dimensions={0} + ROOT subtract = f32[127,125]{1,0} subtract(param_0, broadcast) +} +)"; + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_FALSE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); +} + +TEST_F( + SoftmaxRewriterTritonTest, + DoesNotFuseBinaryElementwiseOperationWhereFirstOperandIsASplatAndSecondOperandIsASharedSplatProducer) { // NOLINT(whitespace/line_length) + const std::string hlo_string = R"( +HloModule nonfusible_diamond +add_computation { + arg_0.1 = f32[] parameter(0) + arg_1.1 = f32[] parameter(1) + ROOT add = f32[] add(arg_0.1, arg_1.1) +} +ENTRY main { + param_0 = f32[127,125]{1,0} parameter(0) + constant_2 = f32[] constant(0.333333343) + broadcast_splat_shared = f32[127,125]{1,0} broadcast(constant_2), dimensions={} + param_1 = f32[127,125]{1,0} parameter(1) + multiply_splat_shared = f32[127,125]{1,0} multiply(broadcast_splat_shared, param_1) + constant_3 = f32[] constant(0.5) + broadcast_splat = f32[127,125]{1,0} broadcast(constant_3), dimensions={} + multiply_splat = f32[127,125]{1,0} multiply(broadcast_splat, broadcast_splat_shared) + multiply = f32[127,125]{1,0} multiply(param_0, multiply_splat) + constant_neg_inf = f32[] constant(-inf) + reduce = f32[127]{0} reduce(multiply, constant_neg_inf), dimensions={1}, to_apply=add_computation + broadcast = f32[127,125]{1,0} broadcast(reduce), dimensions={0} + ROOT subtract = f32[127,125]{1,0} subtract(param_0, broadcast) +} +)"; + + auto module = ParseAndReturnVerifiedModule(hlo_string).value(); + EXPECT_FALSE( + SoftmaxRewriterTritonMatchAndRewrite(gpu_version_, module.get()).value()); +} + INSTANTIATE_TEST_SUITE_P(SoftmaxRewriterTritonTestSuite, SoftmaxRewriterTritonTest, ::testing::Values(F32, F16, BF16)); From 1ee265c2092b3dc631281965c3aa8967279a3e8e Mon Sep 17 00:00:00 2001 From: Michael Hudgins Date: Mon, 25 Sep 2023 09:18:32 -0700 Subject: [PATCH 216/567] Update the remote GCC toolchains to the newest container and defines to match the upgrade from CUDA 11.8 to 12.2 PiperOrigin-RevId: 568239285 --- .../tools/toolchains/remote_config/configs.bzl | 15 +++++++-------- .../tools/toolchains/remote_config/configs.bzl | 15 +++++++-------- .../tools/toolchains/remote_config/configs.bzl | 15 +++++++-------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index 6e37be2447c0ec..497b8e94c7334d 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -580,11 +580,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-python3.8": "docker://gcr.io/tensorflow-sigs/build@sha256:c46d275e5bc760b7af465dc063629b234cfa34aabf0c7fe30581effc0b99648a", - "sigbuild-r2.14-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:06b3a97ef247dbb00a9c6d8315e4e035d891ae2f18088de254f15d6ecedadfb9", - "sigbuild-r2.14-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:00dc9e13130727dcdeb54ca77423e317a79aae84d5783c05b38b7bbdf753f0f6", + "sigbuild-r2.14": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. @@ -608,13 +607,13 @@ def initialize_rbe_configs(): "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", "TF_CUDA_CLANG": "0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", - "TF_CUDA_VERSION": "11.8", - "TF_CUDNN_VERSION": "8.6", + "TF_CUDA_VERSION": "12.2", + "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_NEED_TENSORRT": "1", "TF_SYSROOT": "/dt9", - "TF_TENSORRT_VERSION": "8.4", + "TF_TENSORRT_VERSION": "8.6", }, ) diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl index 666e51de5ffb71..cd829d1a88c42c 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl @@ -580,11 +580,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-python3.8": "docker://gcr.io/tensorflow-sigs/build@sha256:c46d275e5bc760b7af465dc063629b234cfa34aabf0c7fe30581effc0b99648a", - "sigbuild-r2.14-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:06b3a97ef247dbb00a9c6d8315e4e035d891ae2f18088de254f15d6ecedadfb9", - "sigbuild-r2.14-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:00dc9e13130727dcdeb54ca77423e317a79aae84d5783c05b38b7bbdf753f0f6", + "sigbuild-r2.14": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. @@ -608,13 +607,13 @@ def initialize_rbe_configs(): "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", "TF_CUDA_CLANG": "0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", - "TF_CUDA_VERSION": "11.8", - "TF_CUDNN_VERSION": "8.6", + "TF_CUDA_VERSION": "12.2", + "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_NEED_TENSORRT": "1", "TF_SYSROOT": "/dt9", - "TF_TENSORRT_VERSION": "8.4", + "TF_TENSORRT_VERSION": "8.6", }, ) diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index 666e51de5ffb71..cd829d1a88c42c 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -580,11 +580,10 @@ def initialize_rbe_configs(): sigbuild_tf_configs( name_container_map = { - "sigbuild-r2.14": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-python3.8": "docker://gcr.io/tensorflow-sigs/build@sha256:c46d275e5bc760b7af465dc063629b234cfa34aabf0c7fe30581effc0b99648a", - "sigbuild-r2.14-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:c03809a6b4008b430bf241efce78cdcd92c7bc41d11d0ba57216e97d813ac282", - "sigbuild-r2.14-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:06b3a97ef247dbb00a9c6d8315e4e035d891ae2f18088de254f15d6ecedadfb9", - "sigbuild-r2.14-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:00dc9e13130727dcdeb54ca77423e317a79aae84d5783c05b38b7bbdf753f0f6", + "sigbuild-r2.14": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-python3.9": "docker://gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b", + "sigbuild-r2.14-python3.10": "docker://gcr.io/tensorflow-sigs/build@sha256:5b06acad335d5c24aa2f63d39d9de230affd04aa982dda0242eeb893b9dae363", + "sigbuild-r2.14-python3.11": "docker://gcr.io/tensorflow-sigs/build@sha256:80d991d5cf0ac710b568c71c7790270691749afa19de49d7c1a121ba3f92cd58", }, # Unclear why LIBC is set to 2.19 here, and yet manylinux2010 is 2.12 # and manylinux2014 is 2.17. @@ -608,13 +607,13 @@ def initialize_rbe_configs(): "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", "TF_CUDA_CLANG": "0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", - "TF_CUDA_VERSION": "11.8", - "TF_CUDNN_VERSION": "8.6", + "TF_CUDA_VERSION": "12.2", + "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_NEED_TENSORRT": "1", "TF_SYSROOT": "/dt9", - "TF_TENSORRT_VERSION": "8.4", + "TF_TENSORRT_VERSION": "8.6", }, ) From 6655ca4631791a46df4ea7215aa0321200684216 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 09:19:29 -0700 Subject: [PATCH 217/567] Definition for DFATAL is added to TensorFlow logging. PiperOrigin-RevId: 568239531 --- .../xla/third_party/tsl/tsl/platform/default/logging.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/logging.h b/third_party/xla/third_party/tsl/tsl/platform/default/logging.h index 3dba4b3b8653b7..c8bad4bedefe11 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/logging.h +++ b/third_party/xla/third_party/tsl/tsl/platform/default/logging.h @@ -156,6 +156,12 @@ class LogMessageNull : public std::basic_ostringstream { #define _TF_LOG_QFATAL _TF_LOG_FATAL +#ifdef NDEBUG +#define _TF_LOG_DFATAL _TF_LOG_ERROR +#else +#define _TF_LOG_DFATAL _TF_LOG_FATAL +#endif + #define LOG(severity) _TF_LOG_##severity #ifdef IS_MOBILE_PLATFORM From 9aaeb1bb04b12be62cad962f4b3adfcd344b535d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 09:30:14 -0700 Subject: [PATCH 218/567] Keep dims when reducing gradients of broadcasted arguments to reduce size of dynamically-shaped graphs. Motivation: Consider, for example, `z = x * y` where `x` has shape `[3, 1]` and `y` - `[3, 5]`. During backprop the upsteam gradient `gz` of `z` with shape `[3, 5]` is propagated as follows: ``` ix, iy = broadcast_gradient_args(x, y) gx = reshape(reduce_sum(gz * y, ix), shape(x)) gy = reshape(reduce_sum(x * gz, iy), shape(y)) ``` Since input shapes are fully-defined, the optimization passes can easily infer `ix = [1]` and `iy = []` and rewrite the above computation as follows (note that `reduce_sum(x, [])` is an identity): ``` gx = reshape(reduce_sum(gz * y, [1]), [3, 1]) gy = x * gz ``` If we set `keepdims=True` in the above reduction, the `reshape` becomes an identity and can be optimized away as well: ``` gx = reduce_sum(gz * y, [1]) gy = x * gz ``` The impact is not limited to fully-defined shapes. When `keepdims=True` the shape refiner is able to infer the rank of `reduce_sum` result which opens up some other opportunities for optimization (see, for example, `_SumGrad` in the same file). PiperOrigin-RevId: 568242315 --- tensorflow/python/ops/math_grad.py | 374 +++++++++++------------------ 1 file changed, 139 insertions(+), 235 deletions(-) diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 794d4ff2ac102f..d702d7fad03c19 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -30,11 +30,6 @@ from tensorflow.python.ops import special_math_ops -def _safe_shape_div(x, y): - """Divides `x / y` assuming `x, y >= 0`, treating `0 / 0 = 0`.""" - return x // math_ops.maximum(y, 1) - - @ops.RegisterGradient("ArgMax") def _ArgMaxGrad(op: ops.Operation, grad): del op, grad @@ -133,11 +128,37 @@ def SmartBroadcastGradientArgs(x, y, grad): y_shape_tuple, ry_value, y_needs_reduction) -_empty_tuple = () +def _ReduceGradientArg(grad, shape_axes_must_reduce): + """Reduces gradients of one of the arguments of a broadcasting binary op.""" + shape, axes, must_reduce = shape_axes_must_reduce + if grad is not None and must_reduce: + # Applying keepdims=True in presence of unknown axes opens up some + # opportunities for optimizations. For example, _SumGrad below won't have to + # emit extra ops to recover reduced indices for broadcasting. + grad = math_ops.reduce_sum(grad, axes, keepdims=True) + grad = array_ops.reshape(grad, shape) + return grad + + +def _ReduceGradientArgs(x, y, grad, gx, gy): + """Reduces gradients of both arguments of a broadcasting binary op.""" + if gx is not None or gy is not None: + bx, by = SmartBroadcastGradientArgs(x, y, grad) + gx = _ReduceGradientArg(gx, bx) + gy = _ReduceGradientArg(gy, by) + return gx, gy + + +_EMPTY_TUPLE = () def _IsScalar(x): - return x._shape_tuple() is _empty_tuple # pylint: disable=protected-access + return x._shape_tuple() is _EMPTY_TUPLE # pylint: disable=protected-access + + +def _SafeShapeDiv(x, y): + """Divides `x / y` assuming `x, y >= 0`, treating `0 / 0 = 0`.""" + return x // math_ops.maximum(y, 1) @ops.RegisterGradient("Sum") @@ -193,7 +214,7 @@ def EvaluateAsTuple(t): output_shape_kept_dims = EvaluateAsTuple( math_ops.reduced_shape(input_0_shape, axes)) tile_scaling = EvaluateAsTuple( - _safe_shape_div(input_0_shape, output_shape_kept_dims)) + _SafeShapeDiv(input_0_shape, output_shape_kept_dims)) graph._reduced_shape_cache[(input_0_shape, axes)] = ( # pylint:disable=protected-access output_shape_kept_dims, tile_scaling) @@ -1309,24 +1330,16 @@ def _AtanGrad(op: ops.Operation, grad): @ops.RegisterGradient("Atan2") def _Atan2Grad(op: ops.Operation, grad): - """Returns grad * x / (x^2 + y^2), grad * -y / (x^2 + y^2).""" + """Returns grad * x / (y^2 + x^2), grad * -y / (y^2 + x^2).""" y = op.inputs[0] x = op.inputs[1] with ops.control_dependencies([grad]): - (sx, rx, must_reduce_x), (sy, ry, must_reduce_y) = ( - SmartBroadcastGradientArgs(x, y, grad) - ) - - grad_inv = grad / (math_ops.square(x) + math_ops.square(y)) - - gx = -y * grad_inv - if must_reduce_x: - gx = array_ops.reshape(math_ops.reduce_sum(gx, rx), sx) - + grad_inv = grad / (math_ops.square(y) + math_ops.square(x)) gy = x * grad_inv - if must_reduce_y: - gy = array_ops.reshape(math_ops.reduce_sum(gy, ry), sy) - return gy, gx + gx = -y * grad_inv + # pylint: disable=arguments-out-of-order + return _ReduceGradientArgs(y, x, grad, gy, gx) + # pylint: enable=arguments-out-of-order @ops.RegisterGradient("AddN") @@ -1351,109 +1364,80 @@ def _ShapesFullySpecifiedAndEqual(x, y, grad): def _AddGrad(op: ops.Operation, grad): """Gradient for Add.""" y = op.inputs[1] - skip_input_indices = None try: - skip_input_indices = op.skip_input_indices - if skip_input_indices is not None and 1 in skip_input_indices and _IsScalar( - y): + skip_input_indices = op.skip_input_indices or () + if 1 in skip_input_indices and _IsScalar(y): return grad, None except AttributeError: # No gradient skipping, so do the full gradient computation - pass + skip_input_indices = () + x = op.inputs[0] - if (isinstance(grad, tensor.Tensor) and - _ShapesFullySpecifiedAndEqual(x, y, grad)): + if isinstance(grad, tensor.Tensor) and _ShapesFullySpecifiedAndEqual( + x, y, grad + ): return grad, grad - (sx, rx, must_reduce_x), (sy, ry, must_reduce_y) = ( - SmartBroadcastGradientArgs(x, y, grad)) - if skip_input_indices is not None and 0 in skip_input_indices: - gx = None - elif not must_reduce_x: - gx = grad - else: - gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) - if skip_input_indices is not None and 1 in skip_input_indices: - gy = None - elif not must_reduce_y: - gy = grad - else: - gy = array_ops.reshape(math_ops.reduce_sum(grad, ry), sy) - return (gx, gy) + + gx = None if 0 in skip_input_indices else grad + gy = None if 1 in skip_input_indices else grad + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("Sub") def _SubGrad(op: ops.Operation, grad): """Gradient for Sub.""" y = op.inputs[1] - skip_input_indices = None try: - skip_input_indices = op.skip_input_indices - if skip_input_indices is not None and 1 in skip_input_indices and _IsScalar( - y): + skip_input_indices = op.skip_input_indices or () + if 1 in skip_input_indices and _IsScalar(y): return grad, None except AttributeError: # No gradient skipping, so do the full gradient computation - pass + skip_input_indices = () + x = op.inputs[0] - if (isinstance(grad, tensor.Tensor) and - _ShapesFullySpecifiedAndEqual(x, y, grad)): + if isinstance(grad, tensor.Tensor) and _ShapesFullySpecifiedAndEqual( + x, y, grad + ): return grad, -grad - (sx, rx, must_reduce_x), (sy, ry, must_reduce_y) = ( - SmartBroadcastGradientArgs(x, y, grad)) - if skip_input_indices is not None and 0 in skip_input_indices: - gx = None - elif not must_reduce_x: - gx = grad - else: - gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) - if skip_input_indices is not None and 1 in skip_input_indices: - gy = None - elif not must_reduce_y: - gy = -grad - else: - gy = array_ops.reshape(math_ops.reduce_sum(-grad, ry), sy) - return (gx, gy) + + gx = None if 0 in skip_input_indices else grad + gy = None if 1 in skip_input_indices else -grad + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("Mul") def _MulGrad(op: ops.Operation, grad): """The gradient of scalar multiplication.""" y = op.inputs[1] - skip_input_indices = None try: - skip_input_indices = op.skip_input_indices - if skip_input_indices is not None and 1 in skip_input_indices and _IsScalar( - y): + skip_input_indices = op.skip_input_indices or () + if 1 in skip_input_indices and _IsScalar(y): return gen_math_ops.mul(grad, math_ops.conj(y)), None except AttributeError: # No gradient skipping, so do the full gradient computation - pass + skip_input_indices = () + x = op.inputs[0] - if (isinstance(grad, tensor.Tensor) and - _ShapesFullySpecifiedAndEqual(x, y, grad) and - grad.dtype in (dtypes.int32, dtypes.float32)): + if ( + isinstance(grad, tensor.Tensor) + and _ShapesFullySpecifiedAndEqual(x, y, grad) + and grad.dtype in (dtypes.int32, dtypes.float32) + ): return gen_math_ops.mul(grad, y), gen_math_ops.mul(grad, x) assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) - (sx, rx, must_reduce_x), (sy, ry, must_reduce_y) = ( - SmartBroadcastGradientArgs(x, y, grad)) - x = math_ops.conj(x) - y = math_ops.conj(y) - if skip_input_indices is not None and 0 in skip_input_indices: + if 0 in skip_input_indices: gx = None - elif not must_reduce_x: - gx = gen_math_ops.mul(grad, y) else: - gx = array_ops.reshape( - math_ops.reduce_sum(gen_math_ops.mul(grad, y), rx), sx) - if skip_input_indices is not None and 1 in skip_input_indices: + gx = gen_math_ops.mul(grad, math_ops.conj(y)) + + if 1 in skip_input_indices: gy = None - elif not must_reduce_y: - gy = gen_math_ops.mul(x, grad) else: - gy = array_ops.reshape( - math_ops.reduce_sum(gen_math_ops.mul(x, grad), ry), sy) - return (gx, gy) + gy = gen_math_ops.mul(math_ops.conj(x), grad) + + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("MulNoNan") @@ -1461,17 +1445,15 @@ def _MulNoNanGrad(op: ops.Operation, grad): """The gradient of scalar multiplication with NaN-suppression.""" x = op.inputs[0] y = op.inputs[1] - if (isinstance(grad, tensor.Tensor) and - _ShapesFullySpecifiedAndEqual(x, y, grad)): + if isinstance(grad, tensor.Tensor) and _ShapesFullySpecifiedAndEqual( + x, y, grad + ): return gen_math_ops.mul_no_nan(grad, y), gen_math_ops.mul_no_nan(x, grad) + assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) - sx = array_ops.shape(x) - sy = array_ops.shape(y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) - return (array_ops.reshape( - math_ops.reduce_sum(gen_math_ops.mul_no_nan(grad, y), rx), sx), - array_ops.reshape( - math_ops.reduce_sum(gen_math_ops.mul_no_nan(x, grad), ry), sy)) + gx = gen_math_ops.mul_no_nan(grad, y) + gy = gen_math_ops.mul_no_nan(x, grad) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("Div") @@ -1479,17 +1461,11 @@ def _DivGrad(op: ops.Operation, grad): """The gradient for the Div operator.""" x = op.inputs[0] y = op.inputs[1] - sx = array_ops.shape(x) - sy = array_ops.shape(y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) - x = math_ops.conj(x) - y = math_ops.conj(y) - # pylint: disable=invalid-unary-operand-type - return ( - array_ops.reshape(math_ops.reduce_sum(math_ops.divide(grad, y), rx), sx), - array_ops.reshape( - math_ops.reduce_sum(grad * math_ops.divide(math_ops.divide(-x, y), y), - ry), sy)) + cx = math_ops.conj(x) + cy = math_ops.conj(y) + gx = math_ops.divide(grad, cy) + gy = grad * math_ops.divide(math_ops.divide(-cx, cy), cy) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("FloorDiv") @@ -1503,15 +1479,10 @@ def _FloorModGrad(op: ops.Operation, grad): """Returns grad * (1, -floor(x/y)).""" x = math_ops.conj(op.inputs[0]) y = math_ops.conj(op.inputs[1]) - - sx = array_ops.shape(x) - sy = array_ops.shape(y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) floor_xy = math_ops.floor_div(x, y) - gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) - gy = array_ops.reshape( - math_ops.reduce_sum(grad * math_ops.negative(floor_xy), ry), sy) - return gx, gy + gx = grad + gy = grad * math_ops.negative(floor_xy) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("TruncateDiv") @@ -1524,36 +1495,21 @@ def _RealDivGrad(op: ops.Operation, grad): """RealDiv op gradient.""" x = op.inputs[0] y = op.inputs[1] - sx = array_ops.shape(x) - sy = array_ops.shape(y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) - x = math_ops.conj(x) - y = math_ops.conj(y) - return (array_ops.reshape( - math_ops.reduce_sum(math_ops.realdiv(grad, y), rx), sx), - array_ops.reshape( - math_ops.reduce_sum( - grad * math_ops.realdiv(math_ops.realdiv(-x, y), y), ry), sy)) # pylint: disable=invalid-unary-operand-type + cx = math_ops.conj(op.inputs[0]) + cy = math_ops.conj(op.inputs[1]) + gx = math_ops.realdiv(grad, cy) + gy = grad * math_ops.realdiv(math_ops.realdiv(-cx, cy), cy) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("DivNoNan") def _DivNoNanGrad(op: ops.Operation, grad): """DivNoNan op gradient.""" - x = op.inputs[0] - y = op.inputs[1] - sx = array_ops.shape(x) - sy = array_ops.shape(y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) - x = math_ops.conj(x) - y = math_ops.conj(y) - return ( - array_ops.reshape( - math_ops.reduce_sum(math_ops.div_no_nan(grad, y), rx), sx), - array_ops.reshape( - math_ops.reduce_sum( - grad * math_ops.div_no_nan(math_ops.div_no_nan(-x, y), y), # pylint: disable=invalid-unary-operand-type - ry), - sy)) + x = math_ops.conj(op.inputs[0]) + y = math_ops.conj(op.inputs[1]) + gx = math_ops.div_no_nan(grad, y) + gy = grad * math_ops.div_no_nan(math_ops.div_no_nan(-x, y), y) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("Pow") @@ -1561,52 +1517,36 @@ def _PowGrad(op: ops.Operation, grad): """Returns grad * (y*x^(y-1), z*log(x)).""" x = op.inputs[0] y = op.inputs[1] - skip_input_indices = None + cx = math_ops.conj(x) + cy = math_ops.conj(y) try: - skip_input_indices = op.skip_input_indices - # TODO(mrry): If `y` is a constant, we can combine `tf.sub()` and the - # constant `1` into a single constant op. - if skip_input_indices is not None and 1 in skip_input_indices and _IsScalar( - y): - x = math_ops.conj(x) - y = math_ops.conj(y) - return grad * y * math_ops.pow(x, y - 1), None - + skip_input_indices = op.skip_input_indices or () + if 1 in skip_input_indices and _IsScalar(y): + return grad * cy * math_ops.pow(cx, cy - 1), None except AttributeError: # No gradient skipping, so do the full gradient computation - pass - - (sx, rx, must_reduce_x), (sy, ry, must_reduce_y) = ( - SmartBroadcastGradientArgs(x, y, grad)) - x = math_ops.conj(x) - y = math_ops.conj(y) + skip_input_indices = () - if skip_input_indices is None or 0 not in skip_input_indices: - gx = grad * y * math_ops.pow(x, y - 1) - if must_reduce_x: - gx = array_ops.reshape(math_ops.reduce_sum(gx, rx), sx) - else: + if 0 in skip_input_indices: gx = None + else: + gx = grad * cy * math_ops.pow(cx, cy - 1) - if skip_input_indices is None or 1 not in skip_input_indices: - z = math_ops.conj(op.outputs[0]) - + if 1 in skip_input_indices: + gy = None + else: # Avoid false singularity at x = 0 if x.dtype.is_complex: # real(x) < 0 is fine for the complex case - mask = math_ops.not_equal(x, 0) + mask = math_ops.not_equal(cx, 0) else: # There's no sensible real value to return if x < 0, so return 0 - mask = x > 0 - safe_x = array_ops.where(mask, x, array_ops.ones_like(x)) + mask = cx > 0 + safe_x = array_ops.where(mask, cx, array_ops.ones_like(x)) log_x = array_ops.where(mask, math_ops.log(safe_x), array_ops.zeros_like(x)) - gy = grad * z * log_x - if must_reduce_y: - gy = array_ops.reshape(math_ops.reduce_sum(gy, ry), sy) - else: - gy = None + gy = grad * math_ops.conj(op.outputs[0]) * log_x - return gx, gy + return _ReduceGradientArgs(x, y, grad, gx, gy) def _MaximumMinimumGradInputOnly(op: ops.Operation, grad, selector_op): @@ -1622,36 +1562,27 @@ def _MaximumMinimumGradInputOnly(op: ops.Operation, grad, selector_op): def _MaximumMinimumGrad(op: ops.Operation, grad, selector_op): """Factor out the code for the gradient of Maximum or Minimum.""" y = op.inputs[1] - skip_input_indices = None try: - skip_input_indices = op.skip_input_indices - if skip_input_indices is not None and 1 in skip_input_indices and _IsScalar( - y): + skip_input_indices = op.skip_input_indices or () + if 1 in skip_input_indices and _IsScalar(y): # When we want to get gradients for the first input only, and the second # input tensor is a scalar, we can do a much simpler calculation return _MaximumMinimumGradInputOnly(op, grad, selector_op) except AttributeError: # No gradient skipping, so do the full gradient computation - pass + skip_input_indices = () x = op.inputs[0] - sx = array_ops.shape(x) - sy = array_ops.shape(y) zeros = array_ops.zeros_like(grad) xmask = selector_op(x, y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) - if skip_input_indices is not None and 0 in skip_input_indices: + if 0 in skip_input_indices: gx = None else: - xgrad = array_ops.where_v2(xmask, grad, zeros) - gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) - - if skip_input_indices is not None and 1 in skip_input_indices: + gx = array_ops.where_v2(xmask, grad, zeros) + if 1 in skip_input_indices: gy = None else: - ygrad = array_ops.where_v2(xmask, zeros, grad) - gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) - - return (gx, gy) + gy = array_ops.where_v2(xmask, zeros, grad) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("Maximum") @@ -1671,39 +1602,25 @@ def _SquaredDifferenceGrad(op: ops.Operation, grad): """Returns the gradient for (x-y)^2.""" x = op.inputs[0] y = op.inputs[1] - skip_input_indices = None try: - skip_input_indices = op.skip_input_indices + skip_input_indices = op.skip_input_indices or () except AttributeError: # No gradient skipping, so do the full gradient computation - pass + skip_input_indices = () with ops.control_dependencies([grad]): # The parens ensure that if grad is IndexedSlices, it'll get multiplied by # Tensor (not a number like 2.0) which causes it to convert to Tensor. x_grad = math_ops.scalar_mul(2.0, grad) * (x - y) - if (isinstance(grad, tensor.Tensor) and - _ShapesFullySpecifiedAndEqual(x, y, grad)): + if isinstance(grad, tensor.Tensor) and _ShapesFullySpecifiedAndEqual( + x, y, grad + ): return x_grad, -x_grad - (sx, rx, must_reduce_x), (sy, ry, must_reduce_y) = ( - SmartBroadcastGradientArgs(x, y, grad)) - - if skip_input_indices is not None and 0 in skip_input_indices: - gx = None - elif must_reduce_x: - gx = array_ops.reshape(math_ops.reduce_sum(x_grad, rx), sx) - else: - gx = x_grad - - if skip_input_indices is not None and 1 in skip_input_indices: - gy = None - elif must_reduce_y: - gy = -array_ops.reshape(math_ops.reduce_sum(x_grad, ry), sy) - else: - gy = -x_grad - return (gx, gy) + gx = None if 0 in skip_input_indices else x_grad + gy = None if 1 in skip_input_indices else -x_grad + return _ReduceGradientArgs(x, y, grad, gx, gy) # Logical operations have no gradients. @@ -1731,29 +1648,18 @@ def _SelectGrad(op: ops.Operation, grad): ) -# pylint: disable=missing-function-docstring @ops.RegisterGradient("SelectV2") def _SelectGradV2(op: ops.Operation, grad): c = op.inputs[0] x = op.inputs[1] y = op.inputs[2] + z = op.outputs[0] zeros = array_ops.zeros([], dtype=grad.dtype.base_dtype) gx = array_ops.where_v2(c, grad, zeros) - x_shape = array_ops.shape(x) - output_shape = array_ops.shape(op.outputs[0]) - # Reduce away broadcasted leading dims. - reduce_x, _ = gen_array_ops.broadcast_gradient_args(x_shape, output_shape) - gx = math_ops.reduce_sum(gx, keepdims=True, axis=reduce_x) - gx = array_ops.reshape(gx, x_shape) - gy = array_ops.where_v2(c, zeros, grad) - y_shape = array_ops.shape(y) - # Reduce away broadcasted leading dims. - reduce_y, _ = gen_array_ops.broadcast_gradient_args(y_shape, output_shape) - gy = math_ops.reduce_sum(gy, keepdims=True, axis=reduce_y) - gy = array_ops.reshape(gy, y_shape) - - return (None, gx, gy) + gx, _ = _ReduceGradientArgs(x, z, grad, gx, None) + gy, _ = _ReduceGradientArgs(y, z, grad, gy, None) + return None, gx, gy def _MatMulGradAgainstFirstOnly(op: ops.Operation, grad): @@ -1975,11 +1881,9 @@ def _ComplexGrad(op: ops.Operation, grad): """Returns the real and imaginary components of 'grad', respectively.""" x = op.inputs[0] y = op.inputs[1] - sx = array_ops.shape(x) - sy = array_ops.shape(y) - rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) - return (array_ops.reshape(math_ops.reduce_sum(math_ops.real(grad), rx), sx), - array_ops.reshape(math_ops.reduce_sum(math_ops.imag(grad), ry), sy)) + gx = math_ops.real(grad) + gy = math_ops.imag(grad) + return _ReduceGradientArgs(x, y, grad, gx, gy) @ops.RegisterGradient("Real") @@ -1998,7 +1902,7 @@ def _ImagGrad(_, grad): @ops.RegisterGradient("Angle") def _AngleGrad(op: ops.Operation, grad): - """Returns -grad / (Im(x) + iRe(x))""" + """Returns `-grad / (Im(x) + i Re(x))`.""" x = op.inputs[0] with ops.control_dependencies([grad]): re = math_ops.real(x) From ec9e1ee786feb60de67eecc4b4e2ae9c6dca493c Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Mon, 25 Sep 2023 09:37:59 -0700 Subject: [PATCH 219/567] Add cuda 12.2 support to JAX PiperOrigin-RevId: 568244230 --- ...n8.9-ubuntu20.04-manylinux2014-multipython | 47 +++++++++++++++++++ .../toolchains/remote_config/configs.bzl | 22 +++++++++ .../toolchains/remote_config/containers.bzl | 8 ++++ .../toolchains/remote_config/configs.bzl | 22 +++++++++ .../toolchains/remote_config/containers.bzl | 8 ++++ .../toolchains/remote_config/configs.bzl | 22 +++++++++ .../toolchains/remote_config/containers.bzl | 8 ++++ 7 files changed, 137 insertions(+) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython new file mode 100644 index 00000000000000..12e9356664f896 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython @@ -0,0 +1,47 @@ +# Dockerfile to build a manylinux 2010 compliant cross-compiler. +# +# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible +# glibc (2.12) and system libstdc++ (4.4). +# +# To push a new version, run: +# $ docker build -f Dockerfile.rbe.cuda12.2-cudnn8.6-ubuntu20.04-manylinux2014-multipython \ +# --tag "gcr.io/tensorflow-testing/nosla-cuda12.2-cudnn8.6-ubuntu20.04-manylinux2014-multipython" . +# $ docker push gcr.io/tensorflow-testing/nosla-cuda12.2-cudnn8.6-ubuntu20.04-manylinux2014-multipython + +FROM gcr.io/tensorflow-sigs/build@sha256:edf0324686ffbf488ed0a6b7d3a29ecc6f7b7ad9c04b2e1a527a6a47c0dc1b4b + +ENV DEBIAN_FRONTEND=noninteractive + +COPY install/install_bootstrap_deb_packages.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh + +COPY install/install_deb_packages.sh /install/ +RUN /install/install_deb_packages.sh + +RUN apt-get update && apt-get install -y \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + libsqlite3-dev \ + patchelf \ + && \ + rm -rf /var/lib/apt/lists/* + +COPY install/install_bazel.sh /install/ +RUN /install/install_bazel.sh + +COPY install/build_and_install_python.sh /install/ +RUN /install/build_and_install_python.sh "3.9.4" +RUN /install/build_and_install_python.sh "3.10.0" +RUN /install/build_and_install_python.sh "3.11.0" +RUN /install/build_and_install_python.sh "3.12.0rc3" + +COPY install/install_pip_packages_by_version.sh /install/ +# https://github.com/numpy/numpy/issues/22623 for `SETUPTOOLS_USE_DISTUTILS`. +RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" "jax" +RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10" "jax" +RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.11" "jax" +RUN SETUPTOOLS_USE_DISTUTILS=stdlib /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.12" "jax" diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index 497b8e94c7334d..e774cdba1bffd7 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -252,6 +252,28 @@ def initialize_rbe_configs(): python_install_path = "/usr/local", ) + tensorflow_rbe_config( + name = "ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9", + compiler = "/usr/lib/llvm-17/bin/clang", + cuda_version = "12.2", + cudnn_version = "8.9", + os = "ubuntu20.04-manylinux2014-multipython", + python_versions = ["3.9", "3.10", "3.11", "3.12"], + sysroot = "/dt9", + python_install_path = "/usr/local", + ) + + tensorflow_rbe_config( + name = "ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9", + compiler = "/dt9/usr/bin/gcc", + compiler_prefix = "/usr/bin", + cuda_version = "12.2", + cudnn_version = "8.9", + os = "ubuntu20.04-manylinux2014-multipython", + python_versions = ["3.9", "3.10", "3.11", "3.12"], + python_install_path = "/usr/local", + ) + tensorflow_rbe_win_config( name = "windows_py37", python_bin_path = "C:/Python37/python.exe", diff --git a/tensorflow/tools/toolchains/remote_config/containers.bzl b/tensorflow/tools/toolchains/remote_config/containers.bzl index 830b05b0c444b6..2819512e4b902c 100644 --- a/tensorflow/tools/toolchains/remote_config/containers.bzl +++ b/tensorflow/tools/toolchains/remote_config/containers.bzl @@ -9,6 +9,7 @@ container_digests = { "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:55cf36aa54debd7ec7b3aac5a84af1fe3691a186aceae8d1d8eafe886d6a6950", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", @@ -114,6 +115,13 @@ containers = { "digest": container_digests["cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython"], }, + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython. + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": { + "registry": "gcr.io", + "repository": "tensorflow-testing/nosla-cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython", + "digest": container_digests["cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython"], + }, + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython. "rocm-ubuntu18.04-manylinux2010-multipython": { "registry": "gcr.io", diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl index cd829d1a88c42c..695384b9bb70b3 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl @@ -252,6 +252,28 @@ def initialize_rbe_configs(): python_install_path = "/usr/local", ) + tensorflow_rbe_config( + name = "ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9", + compiler = "/usr/lib/llvm-17/bin/clang", + cuda_version = "12.2", + cudnn_version = "8.9", + os = "ubuntu20.04-manylinux2014-multipython", + python_versions = ["3.9", "3.10", "3.11", "3.12"], + sysroot = "/dt9", + python_install_path = "/usr/local", + ) + + tensorflow_rbe_config( + name = "ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9", + compiler = "/dt9/usr/bin/gcc", + compiler_prefix = "/usr/bin", + cuda_version = "12.2", + cudnn_version = "8.9", + os = "ubuntu20.04-manylinux2014-multipython", + python_versions = ["3.9", "3.10", "3.11", "3.12"], + python_install_path = "/usr/local", + ) + tensorflow_rbe_win_config( name = "windows_py37", python_bin_path = "C:/Python37/python.exe", diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl index 830b05b0c444b6..2819512e4b902c 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl @@ -9,6 +9,7 @@ container_digests = { "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:55cf36aa54debd7ec7b3aac5a84af1fe3691a186aceae8d1d8eafe886d6a6950", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", @@ -114,6 +115,13 @@ containers = { "digest": container_digests["cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython"], }, + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython. + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": { + "registry": "gcr.io", + "repository": "tensorflow-testing/nosla-cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython", + "digest": container_digests["cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython"], + }, + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython. "rocm-ubuntu18.04-manylinux2010-multipython": { "registry": "gcr.io", diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index cd829d1a88c42c..695384b9bb70b3 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -252,6 +252,28 @@ def initialize_rbe_configs(): python_install_path = "/usr/local", ) + tensorflow_rbe_config( + name = "ubuntu20.04-clang_manylinux2014-cuda12.2-cudnn8.9", + compiler = "/usr/lib/llvm-17/bin/clang", + cuda_version = "12.2", + cudnn_version = "8.9", + os = "ubuntu20.04-manylinux2014-multipython", + python_versions = ["3.9", "3.10", "3.11", "3.12"], + sysroot = "/dt9", + python_install_path = "/usr/local", + ) + + tensorflow_rbe_config( + name = "ubuntu20.04-gcc9_manylinux2014-cuda12.2-cudnn8.9", + compiler = "/dt9/usr/bin/gcc", + compiler_prefix = "/usr/bin", + cuda_version = "12.2", + cudnn_version = "8.9", + os = "ubuntu20.04-manylinux2014-multipython", + python_versions = ["3.9", "3.10", "3.11", "3.12"], + python_install_path = "/usr/local", + ) + tensorflow_rbe_win_config( name = "windows_py37", python_bin_path = "C:/Python37/python.exe", diff --git a/third_party/xla/tools/toolchains/remote_config/containers.bzl b/third_party/xla/tools/toolchains/remote_config/containers.bzl index 830b05b0c444b6..2819512e4b902c 100644 --- a/third_party/xla/tools/toolchains/remote_config/containers.bzl +++ b/third_party/xla/tools/toolchains/remote_config/containers.bzl @@ -9,6 +9,7 @@ container_digests = { "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:55cf36aa54debd7ec7b3aac5a84af1fe3691a186aceae8d1d8eafe886d6a6950", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", @@ -114,6 +115,13 @@ containers = { "digest": container_digests["cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython"], }, + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython. + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": { + "registry": "gcr.io", + "repository": "tensorflow-testing/nosla-cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython", + "digest": container_digests["cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython"], + }, + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython. "rocm-ubuntu18.04-manylinux2010-multipython": { "registry": "gcr.io", From 149cf817b927dc03c56ad57c4def9fb18fb5957a Mon Sep 17 00:00:00 2001 From: Yishuang Pang Date: Mon, 25 Sep 2023 10:06:51 -0700 Subject: [PATCH 220/567] Legalize some MHLO broadcasted binary element-wise ops (add, div, max, min, mul, pow, shift left, sub, atan2) to TF ops directly. tf.BroadcastTo op folder folds constants by default, this would increase the size of models converted from StableHLO because StableHLO requires explicit broadcasting. This change helps reduce model size by removing unnecessary broadcasts at legalization stage. PiperOrigin-RevId: 568252070 --- .../lite/stablehlo/tests/legalize_hlo.mlir | 143 +++++++++++++++++- .../transforms/legalize_hlo_patterns.td | 12 ++ 2 files changed, 151 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir b/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir index ddc6848686f5a6..e02ce38908d9ff 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir +++ b/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir @@ -47,12 +47,27 @@ func.func @add(%arg0: tensor<2xi32>) -> tensor<2xi32> { } // CHECK-LABEL: func @broadcast_add( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[1, 1000]> : tensor<2xi64> +// CHECK: %[[VAL_2:.*]] = "tf.AddV2"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1x1xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> +// CHECK: %[[VAL_3:.*]] = "tf.AddV2"(%[[VAL_1]], %[[VAL_0]]) : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x1000xf32>, tensor<1x1000xf32> +// CHECK: } +func.func @broadcast_add(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1000xf32> + %1 = mhlo.add %0, %arg1 : tensor<1x1000xf32> + %2 = mhlo.add %arg1, %0 : tensor<1x1000xf32> + func.return %1, %2 : tensor<1x1000xf32>, tensor<1x1000xf32> +} + +// CHECK-LABEL: func @broadcast_add_chlo( // CHECK-SAME: %[[VAL_0:.*]]: tensor<1xi32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<1x2xi32>) -> tensor<1x2xi32> { // CHECK: %[[VAL_2:.*]] = "tf.AddV2"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> // CHECK: return %[[VAL_2]] : tensor<1x2xi32> // CHECK: } -func.func @broadcast_add(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { +func.func @broadcast_add_chlo(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { %0 = "chlo.broadcast_add"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> func.return %0 : tensor<1x2xi32> } @@ -86,12 +101,27 @@ func.func @div(%arg0: tensor<2xi32>) -> tensor<2xi32> { } // CHECK-LABEL: func @broadcast_div( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[1, 1000]> : tensor<2xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Div"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1x1xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> +// CHECK: %[[VAL_3:.*]] = "tf.Div"(%[[VAL_1]], %[[VAL_0]]) : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x1000xf32>, tensor<1x1000xf32> +// CHECK: } +func.func @broadcast_div(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1000xf32> + %1 = mhlo.divide %0, %arg1 : tensor<1x1000xf32> + %2 = mhlo.divide %arg1, %0 : tensor<1x1000xf32> + func.return %1, %2 : tensor<1x1000xf32>, tensor<1x1000xf32> +} + +// CHECK-LABEL: func @broadcast_div_chlo( // CHECK-SAME: %[[VAL_0:.*]]: tensor<1xi32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<1x2xi32>) -> tensor<1x2xi32> { // CHECK: %[[VAL_2:.*]] = "tf.Div"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> // CHECK: return %[[VAL_2]] : tensor<1x2xi32> // CHECK: } -func.func @broadcast_div(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { +func.func @broadcast_div_chlo(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { %0 = "chlo.broadcast_divide"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> func.return %0 : tensor<1x2xi32> } @@ -107,6 +137,21 @@ func.func @shift_left(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi3 func.return %0 : tensor<4xi32> } +// CHECK-LABEL: func @broadcast_shift_left( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1xi32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xi32>) -> (tensor<4xi32>, tensor<4xi32>) { +// CHECK-DAG. %cst = arith.constant dense<[4]> : tensor<1xi64> +// CHECK: %[[VAL_2:.*]] = "tf.LeftShift"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xi32>, tensor<4xi32>) -> tensor<4xi32> +// CHECK: %[[VAL_3:.*]] = "tf.LeftShift"(%[[VAL_1]], %[[VAL_0]]) : (tensor<4xi32>, tensor<1xi32>) -> tensor<4xi32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<4xi32>, tensor<4xi32> +// CHECK: } +func.func @broadcast_shift_left(%arg0: tensor<1xi32>, %arg1: tensor<4xi32>) -> (tensor<4xi32>, tensor<4xi32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0]> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<4xi32> + %1 = mhlo.shift_left %0, %arg1 : tensor<4xi32> + %2 = mhlo.shift_left %arg1, %0 : tensor<4xi32> + func.return %1, %2 : tensor<4xi32>, tensor<4xi32> +} + // CHECK-LABEL: func @div_dynamic( // CHECK-SAME: %[[VAL_0:.*]]: tensor, // CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { @@ -129,6 +174,21 @@ func.func @maximum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> func.return %0 : tensor<4xf32> } +// CHECK-LABEL: func @broadcast_maximum( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[1, 1000]> : tensor<2xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Maximum"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1x1xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> +// CHECK: %[[VAL_3:.*]] = "tf.Maximum"(%[[VAL_1]], %[[VAL_0]]) : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x1000xf32>, tensor<1x1000xf32> +// CHECK: } +func.func @broadcast_maximum(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1000xf32> + %1 = mhlo.maximum %0, %arg1 : tensor<1x1000xf32> + %2 = mhlo.maximum %arg1, %0 : tensor<1x1000xf32> + func.return %1, %2 : tensor<1x1000xf32>, tensor<1x1000xf32> +} + // CHECK-LABEL: func @minimum( // CHECK-SAME: %[[VAL_0:.*]]: tensor<4xf32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<4xf32>) -> tensor<4xf32> { @@ -140,6 +200,21 @@ func.func @minimum(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> func.return %0 : tensor<4xf32> } +// CHECK-LABEL: func @broadcast_minimum( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[1, 1000]> : tensor<2xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Minimum"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1x1xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> +// CHECK: %[[VAL_3:.*]] = "tf.Minimum"(%[[VAL_1]], %[[VAL_0]]) : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x1000xf32>, tensor<1x1000xf32> +// CHECK: } +func.func @broadcast_minimum(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1000xf32> + %1 = mhlo.minimum %0, %arg1 : tensor<1x1000xf32> + %2 = mhlo.minimum %arg1, %0 : tensor<1x1000xf32> + func.return %1, %2 : tensor<1x1000xf32>, tensor<1x1000xf32> +} + // CHECK-LABEL: func @mul( // CHECK-SAME: %[[VAL_0:.*]]: tensor<2xi32>) -> tensor<2xi32> { // CHECK: %[[VAL_1:.*]] = "tf.Mul"(%[[VAL_0]], %[[VAL_0]]) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32> @@ -151,12 +226,27 @@ func.func @mul(%arg0: tensor<2xi32>) -> tensor<2xi32> { } // CHECK-LABEL: func @broadcast_mul( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[1, 1000]> : tensor<2xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Mul"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1x1xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> +// CHECK: %[[VAL_3:.*]] = "tf.Mul"(%[[VAL_1]], %[[VAL_0]]) : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x1000xf32>, tensor<1x1000xf32> +// CHECK: } +func.func @broadcast_mul(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1000xf32> + %1 = mhlo.multiply %0, %arg1 : tensor<1x1000xf32> + %2 = mhlo.multiply %arg1, %0 : tensor<1x1000xf32> + func.return %1, %2 : tensor<1x1000xf32>, tensor<1x1000xf32> +} + +// CHECK-LABEL: func @broadcast_mul_chlo( // CHECK-SAME: %[[VAL_0:.*]]: tensor<1xi32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<1x2xi32>) -> tensor<1x2xi32> { // CHECK: %[[VAL_2:.*]] = "tf.Mul"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> // CHECK: return %[[VAL_2]] : tensor<1x2xi32> // CHECK: } -func.func @broadcast_mul(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { +func.func @broadcast_mul_chlo(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { %0 = "chlo.broadcast_multiply"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> func.return %0 : tensor<1x2xi32> } @@ -193,16 +283,46 @@ func.func @sub(%arg0: tensor<2xi32>) -> tensor<2xi32> { } // CHECK-LABEL: func @broadcast_sub( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[1, 1000]> : tensor<2xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Sub"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1x1xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> +// CHECK: %[[VAL_3:.*]] = "tf.Sub"(%[[VAL_1]], %[[VAL_0]]) : (tensor<1x1000xf32>, tensor<1x1xf32>) -> tensor<1x1000xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x1000xf32>, tensor<1x1000xf32> +// CHECK: } +func.func @broadcast_sub(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1000xf32>) -> (tensor<1x1000xf32>, tensor<1x1000xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1000xf32> + %1 = mhlo.subtract %0, %arg1 : tensor<1x1000xf32> + %2 = mhlo.subtract %arg1, %0 : tensor<1x1000xf32> + func.return %1, %2 : tensor<1x1000xf32>, tensor<1x1000xf32> +} + +// CHECK-LABEL: func @broadcast_sub_chlo( // CHECK-SAME: %[[VAL_0:.*]]: tensor<1xi32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<1x2xi32>) -> tensor<1x2xi32> { // CHECK: %[[VAL_2:.*]] = "tf.Sub"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> // CHECK: return %[[VAL_2]] : tensor<1x2xi32> // CHECK: } -func.func @broadcast_sub(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { +func.func @broadcast_sub_chlo(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> { %0 = "chlo.broadcast_subtract"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32> func.return %0 : tensor<1x2xi32> } +// CHECK-LABEL: func @broadcast_atan2( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { +// CHECK-DAG. %cst = arith.constant dense<[4]> : tensor<1xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Atan2"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xf32>, tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[VAL_3:.*]] = "tf.Atan2"(%[[VAL_1]], %[[VAL_0]]) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<4xf32>, tensor<4xf32> +// CHECK: } +func.func @broadcast_atan2(%arg0: tensor<1xf32>, %arg1: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0]> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<4xf32> + %1 = mhlo.atan2 %0, %arg1 : tensor<4xf32> + %2 = mhlo.atan2 %arg1, %0 : tensor<4xf32> + func.return %1, %2 : tensor<4xf32>, tensor<4xf32> +} + // CHECK-LABEL: func @shift_right( // CHECK-SAME: %[[VAL_0:.*]]: tensor<4xi32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<4xi32>) -> tensor<4xi32> { @@ -367,6 +487,21 @@ func.func @pow(%arg0: tensor<2xf32>) -> tensor<2xf32> { func.return %0 : tensor<2xf32> } +// CHECK-LABEL: func @broadcast_pow( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1xi32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xi32>) -> (tensor<4xi32>, tensor<4xi32>) { +// CHECK-DAG. %cst = arith.constant dense<[4]> : tensor<1xi64> +// CHECK: %[[VAL_2:.*]] = "tf.Pow"(%[[VAL_0]], %[[VAL_1]]) : (tensor<1xi32>, tensor<4xi32>) -> tensor<4xi32> +// CHECK: %[[VAL_3:.*]] = "tf.Pow"(%[[VAL_1]], %[[VAL_0]]) : (tensor<4xi32>, tensor<1xi32>) -> tensor<4xi32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<4xi32>, tensor<4xi32> +// CHECK: } +func.func @broadcast_pow(%arg0: tensor<1xi32>, %arg1: tensor<4xi32>) -> (tensor<4xi32>, tensor<4xi32>) { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0]> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<4xi32> + %1 = mhlo.power %0, %arg1 : tensor<4xi32> + %2 = mhlo.power %arg1, %0 : tensor<4xi32> + func.return %1, %2 : tensor<4xi32>, tensor<4xi32> +} + // CHECK-LABEL: func @pow_dynamic( // CHECK-SAME: %[[VAL_0:.*]]: tensor) -> tensor { // CHECK: %[[VAL_1:.*]] = "tf.Pow"(%[[VAL_0]], %[[VAL_0]]) : (tensor, tensor) -> tensor diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td index 3ba9d1911ffab6..9cf9aa518849f5 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td @@ -64,6 +64,18 @@ foreach fromToBinPair = [[MHLO_AddOp, CHLO_BroadcastAddOp, TF_AddV2Op], def : Pat<(fromToBinPair[1] $l, $r, $broadcast_dimensions), (fromToBinPair[2] $l, $r), [(IsLegalNumpyRankedBroadcast $l, $r, $broadcast_dimensions)]>; + def : Pat<(fromToBinPair[0] $l, + (MHLO_BroadcastInDimOp:$output + $bcast_operand, + $broadcast_dimensions)), + (fromToBinPair[2] $l, $bcast_operand), + [(IsTFStyleBroadcast $broadcast_dimensions, $output)]>; + def : Pat<(fromToBinPair[0] (MHLO_BroadcastInDimOp:$output + $bcast_operand, + $broadcast_dimensions), + $r), + (fromToBinPair[2] $bcast_operand, $r), + [(IsTFStyleBroadcast $broadcast_dimensions, $output)]>; } foreach pair = [[MHLO_AndOp, CHLO_BroadcastAndOp, TF_BitwiseAndOp], From 036b68f244770a268ef3025bbeaac89793cf22bf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 10:08:00 -0700 Subject: [PATCH 221/567] Add dependency from source node to VarHandleOp This can guanrantee the graph's source node is connected to all source nodes in the graph. PiperOrigin-RevId: 568252463 --- tensorflow/core/tpu/kernels/tpu_functional_ops.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/core/tpu/kernels/tpu_functional_ops.cc b/tensorflow/core/tpu/kernels/tpu_functional_ops.cc index a9f1aa375f0b9c..12174b42276a9c 100644 --- a/tensorflow/core/tpu/kernels/tpu_functional_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_functional_ops.cc @@ -1951,6 +1951,12 @@ Status TPUPartitionedCallOp::ReplaceAndPartitionXLAShardingVariable( AddNodeAttr("shape", proto, &ndef); TF_ASSIGN_OR_RETURN(Node * new_node, graph->AddNode(ndef)); + + // connect new node to source graph, so it can meet the graph specification + for (const Edge* edge : variable->in_edges()) { + graph->AddEdge(edge->src(), edge->src_output(), new_node, + edge->dst_input()); + } per_core_vars.push_back(new_node); } From edf7215123c67d76199d099779137b974b6e1293 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 10:08:22 -0700 Subject: [PATCH 222/567] [mlir][sparse][xla-cpu] Add rewriting rules to legalize sparse_tensor.convert PiperOrigin-RevId: 568252598 --- .../legalize_sparse_ops/sparse_ops_to_custom_calls.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/mlir_hlo/mhlo/transforms/legalize_sparse_ops/sparse_ops_to_custom_calls.cc b/third_party/xla/xla/mlir_hlo/mhlo/transforms/legalize_sparse_ops/sparse_ops_to_custom_calls.cc index 69d061517a47a2..fa17f5145fa87f 100644 --- a/third_party/xla/xla/mlir_hlo/mhlo/transforms/legalize_sparse_ops/sparse_ops_to_custom_calls.cc +++ b/third_party/xla/xla/mlir_hlo/mhlo/transforms/legalize_sparse_ops/sparse_ops_to_custom_calls.cc @@ -57,7 +57,8 @@ void populateLegalizeSparseOpsToCustomCallPatterns( MLIRContext* context, TypeConverter& typeConverter, RewritePatternSet* patterns) { patterns->add, - SparseOpToCustomCallConverter>( + SparseOpToCustomCallConverter, + SparseOpToCustomCallConverter>( typeConverter, context); } From 8e7844a87b8bf640a3bf27040644060bce23119e Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 25 Sep 2023 10:15:18 -0700 Subject: [PATCH 223/567] Add nccl to sigbuild and JAX Dockerfiles. https://github.com/openxla/xla/pull/5850/files adds the ability to use a binary NCCL via a stub, just as with NVIDIA's other libraries. Make sure that NCCL is available in CI dockerfiles. PiperOrigin-RevId: 568254770 --- tensorflow/tools/tf_sig_build_dockerfiles/devel.packages.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/devel.packages.txt b/tensorflow/tools/tf_sig_build_dockerfiles/devel.packages.txt index 50c4eca080a4b5..311a4d905814a0 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/devel.packages.txt +++ b/tensorflow/tools/tf_sig_build_dockerfiles/devel.packages.txt @@ -11,6 +11,7 @@ libcurand-12-2 libcusolver-dev-12-2 libcusparse-dev-12-2 libcublas-dev-12-2 +libnccl-dev=2.18.5-1+cuda12.2 # CuDNN: https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#ubuntu-network-installation libcudnn8-dev=8.9.4.25-1+cuda12.2 libcudnn8=8.9.4.25-1+cuda12.2 From 26d09c4465b8d9376d574f7728092793a02deac9 Mon Sep 17 00:00:00 2001 From: Jieying Luo Date: Mon, 25 Sep 2023 10:51:12 -0700 Subject: [PATCH 224/567] [PJRT C API] Implement framework side change for registering a custom call. - Add a py extension to call the custom call C API. - Change the implementation of register_custom_call_target to store handlers for the custom call targets and delays the registration until the handler for a xla platform is registered. - Change register_plugin to load PJRT plugin when register_pluin is called (instead of when a client is created), and let it return the PJRT_Api* loaded. - Delay calling discover_pjrt_plugins() and register_pjrt_plugin_factories_from_env() until the first time backends() is called. PiperOrigin-RevId: 568265745 --- third_party/xla/xla/pjrt/pjrt_api.cc | 8 ++-- third_party/xla/xla/pjrt/pjrt_api.h | 7 +-- third_party/xla/xla/python/BUILD | 2 + third_party/xla/xla/python/xla.cc | 8 +++- third_party/xla/xla/python/xla_client.py | 55 ++++++++++++++++++++--- third_party/xla/xla/python/xla_client.pyi | 8 +++- 6 files changed, 74 insertions(+), 14 deletions(-) diff --git a/third_party/xla/xla/pjrt/pjrt_api.cc b/third_party/xla/xla/pjrt/pjrt_api.cc index 170edcb6f447e3..84dbad9687d048 100644 --- a/third_party/xla/xla/pjrt/pjrt_api.cc +++ b/third_party/xla/xla/pjrt/pjrt_api.cc @@ -79,8 +79,8 @@ xla::Status SetPjrtApi(absl::string_view device_type, const PJRT_Api* api) { } typedef const PJRT_Api* (*PjrtApiInitFn)(); -xla::Status LoadPjrtPlugin(absl::string_view device_type, - absl::string_view library_path) { +xla::StatusOr LoadPjrtPlugin(absl::string_view device_type, + absl::string_view library_path) { #ifdef PLATFORM_WINDOWS return tsl::errors::Unimplemented( "LoadPjrtPlugin is not implemented on windows yet."); @@ -97,7 +97,9 @@ xla::Status LoadPjrtPlugin(absl::string_view device_type, } LOG(INFO) << "GetPjrtApi was found for " << device_type << " at " << library_path; - return SetPjrtApi(device_type, init_fn()); + const PJRT_Api* api = init_fn(); + TF_RETURN_IF_ERROR(SetPjrtApi(device_type, api)); + return api; #endif } diff --git a/third_party/xla/xla/pjrt/pjrt_api.h b/third_party/xla/xla/pjrt/pjrt_api.h index 4b089b1214ac48..8a1d3cfe7468c7 100644 --- a/third_party/xla/xla/pjrt/pjrt_api.h +++ b/third_party/xla/xla/pjrt/pjrt_api.h @@ -31,9 +31,10 @@ xla::Status SetPjrtApi(absl::string_view device_type, const PJRT_Api* api); // Loads a PJRT plugin. The library provided by library_path must export a // symbol called `GetPjrtApi` with function signature `const PJRT_Api* // GetPjrtApi()`. This method dlopen the plugin library, dlsym `GetPjrtApi`, -// calls `GetPjrtApi` and `SetPjrtApi`. -xla::Status LoadPjrtPlugin(absl::string_view device_type, - absl::string_view library_path); +// calls `GetPjrtApi` and `SetPjrtApi`. Returns the loaded PJRT_Api* if +// successful. +xla::StatusOr LoadPjrtPlugin(absl::string_view device_type, + absl::string_view library_path); // Requires that SetPjrtApi has been successfully called on `device_type` before // calling this method. diff --git a/third_party/xla/xla/python/BUILD b/third_party/xla/xla/python/BUILD index d5f3d9197fbab5..44852eec3fb6f1 100644 --- a/third_party/xla/xla/python/BUILD +++ b/third_party/xla/xla/python/BUILD @@ -1071,6 +1071,7 @@ cc_library( ":weakref_lru_cache", ":xla_compiler", # placeholder for index annotation deps + "@com_google_absl//absl/base", "@com_google_absl//absl/log:initialize", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -1089,6 +1090,7 @@ cc_library( "//xla/pjrt:pjrt_client", "//xla/pjrt:pjrt_compiler", "//xla/pjrt:tfrt_cpu_pjrt_client", + "//xla/pjrt/c:pjrt_c_api_hdrs", "//xla/pjrt/distributed", "//xla/pjrt/distributed:client", "//xla/pjrt/distributed:protocol_proto_cc", diff --git a/third_party/xla/xla/python/xla.cc b/third_party/xla/xla/python/xla.cc index c7277c0d501335..46053a1009a0ae 100644 --- a/third_party/xla/xla/python/xla.cc +++ b/third_party/xla/xla/python/xla.cc @@ -26,11 +26,13 @@ limitations under the License. #include #include +#include "absl/base/casts.h" // clang-format off // Must be included first #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/synchronization/mutex.h" +#include "xla/pjrt/c/pjrt_c_api.h" #include "xla/pjrt/distributed/protocol.pb.h" #include "xla/python/py_client.h" #include "tsl/python/lib/core/numpy.h" //NOLINT @@ -472,8 +474,10 @@ static void Init(py::module_& m) { return pjrt_api.ok(); }); m.def("load_pjrt_plugin", - [](std::string platform_name, std::string library_path) { - xla::ThrowIfError(pjrt::LoadPjrtPlugin(platform_name, library_path)); + [](std::string platform_name, std::string library_path) -> py::capsule { + const PJRT_Api* api = xla::ValueOrThrow( + pjrt::LoadPjrtPlugin(platform_name, library_path)); + return py::capsule(absl::bit_cast(api), "pjrt_c_api"); }); m.def("pjrt_plugin_initialized", [](std::string platform_name) -> bool { return xla::ValueOrThrow(pjrt::IsPjrtPluginInitialized(platform_name)); diff --git a/third_party/xla/xla/python/xla_client.py b/third_party/xla/xla/python/xla_client.py index d6172122d20e4f..e0d48c09120db1 100644 --- a/third_party/xla/xla/python/xla_client.py +++ b/third_party/xla/xla/python/xla_client.py @@ -21,6 +21,7 @@ import inspect import logging import os +import threading from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union from . import xla_extension as _xla @@ -44,7 +45,7 @@ # Just an internal arbitrary increasing number to help with backward-compatible # changes. In JAX, reference this via jax._src.lib.xla_extension_version. -_version = 197 +_version = 198 # Version number for MLIR:Python components. mlir_api_version = 54 @@ -60,6 +61,9 @@ def make_cpu_client() -> ...: + register_custom_call_handler( + 'cpu', _xla.register_custom_call_target + ) return _xla.get_tfrt_cpu_client(asynchronous=True) @@ -91,6 +95,12 @@ def make_gpu_client( if memory_fraction: config.memory_fraction = float(memory_fraction) config.preallocate = preallocate not in ('0', 'false', 'False') + register_custom_call_handler( + 'CUDA', _xla.register_custom_call_target + ) + register_custom_call_handler( + 'ROCM', _xla.register_custom_call_target + ) if mock: return _xla.get_mock_gpu_client( @@ -137,8 +147,8 @@ def pjrt_plugin_loaded(plugin_name: str) -> bool: return _xla.pjrt_plugin_loaded(plugin_name) -def load_pjrt_plugin_dynamically(plugin_name: str, library_path: str) -> None: - _xla.load_pjrt_plugin(plugin_name, library_path) +def load_pjrt_plugin_dynamically(plugin_name: str, library_path: str) -> Any: + return _xla.load_pjrt_plugin(plugin_name, library_path) def pjrt_plugin_initialized(plugin_name: str) -> bool: @@ -509,6 +519,12 @@ def LoadedExecutable_execute_with_token(self, arguments, device=None): LoadedExecutable.execute_with_token = LoadedExecutable_execute_with_token +_custom_callback_handler: dict[str, Any] = {} +# Key is xla_platform_name, value is (function_name, function) +_custom_callback: dict[str, list[Tuple[str, Any]]] = {} +_custom_callback_lock = threading.Lock() + + def register_custom_call_target( name: str, fn: Any, platform: str = 'cpu' ) -> None: @@ -521,8 +537,37 @@ def register_custom_call_target( """ # To support AMD GPUs, we need to have xla_platform_names["gpu"] == "ROCM" # Since that is hardcoded to CUDA, we are using the following as workaround. - _xla.register_custom_call_target(name, fn, - xla_platform_names.get(platform, platform)) + xla_platform_name = xla_platform_names.get(platform, platform) + with _custom_callback_lock: + if xla_platform_name in _custom_callback_handler: + _custom_callback_handler[xla_platform_name](name, fn, xla_platform_name) + else: + _custom_callback.setdefault(xla_platform_name, []).append((name, fn)) + + +def register_custom_call_handler(platform: str, handler: Any) -> None: + """Registers a custom handler and use it to register existing custom calls. + + If a custom call handler for the platform already exist, calling this method + is a no-op and it will not register a new handler. + Args: + platform: the target platform. + handler: the function to register a custom call. + """ + xla_platform_name = xla_platform_names.get(platform, platform) + with _custom_callback_lock: + if xla_platform_name in _custom_callback_handler: + logger.debug( + 'Custom call handler for %s is already register. Will not register a' + ' new one', + xla_platform_name, + ) + return + _custom_callback_handler[xla_platform_name] = handler + if xla_platform_name in _custom_callback: + for name, fn in _custom_callback[xla_platform_name]: + handler(name, fn, xla_platform_name) + del _custom_callback[xla_platform_name] register_custom_call_partitioner = _xla.register_custom_call_partitioner diff --git a/third_party/xla/xla/python/xla_client.pyi b/third_party/xla/xla/python/xla_client.pyi index a6f48f7a0d469e..7a01e4817a5364 100644 --- a/third_party/xla/xla/python/xla_client.pyi +++ b/third_party/xla/xla/python/xla_client.pyi @@ -119,7 +119,7 @@ def make_c_api_client( def pjrt_plugin_loaded(plugin_name: str) -> bool: ... -def load_pjrt_plugin_dynamically(plugin_name: str, library_path: str) -> None: +def load_pjrt_plugin_dynamically(plugin_name: str, library_path: str) -> Any: ... @@ -249,4 +249,10 @@ def register_custom_call_target( name: str, fn: Callable, platform: str = ... ) -> None: ... + + +def register_custom_call_handler(xla_platform_name: str, handler: Any) -> None: + ... + + def encode_inspect_sharding_callback(handler: Any) -> bytes: ... From 397c205a3fa7b3c7c481f2224741c8b8a5e3523d Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 25 Sep 2023 11:00:39 -0700 Subject: [PATCH 225/567] Enable imports such as `from tensorflow.keras import layers` PiperOrigin-RevId: 568268550 --- tensorflow/api_template.__init__.py | 19 ++++++++++++------- tensorflow/api_template_v1.__init__.py | 7 ++++++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index 8b789f7202db6d..6894ab538894d6 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -112,7 +112,7 @@ if _keras_module.__version__.startswith("3."): # This is the Keras 3.x case. _keras_to_use = _keras_module._tf_keras - _keras_package_name = "keras._tf_keras" + _keras_package_name = "keras._tf_keras.keras" _keras_version = "keras_3" else: # This is the Keras 2.x case. @@ -123,7 +123,12 @@ pass if _keras_to_use is not None: - setattr(_current_module, "keras", _keras_to_use) + _module_dir = _module_util.get_parent_dir_for_name(_keras_package_name) + if _module_dir: + _current_module.__path__ = [_module_dir] + _current_module.__path__ + setattr(_current_module, + "keras", + _LazyLoader("keras", globals(), _keras_package_name)) else: # TF will not have `tf.keras` in this case. This should not be silent. _logging.warning("Unable to load `tf.keras`. Check that the `keras` package " @@ -235,11 +240,11 @@ def _running_from_pip_package(): from tf_keras.api._v2.keras import optimizers from tf_keras.api._v2.keras import initializers elif _keras_version == "keras_3": - from keras import _tf_keras as keras - from keras._tf_keras import losses - from keras._tf_keras import metrics - from keras._tf_keras import optimizers - from keras._tf_keras import initializers + from keras._tf_keras import keras + from keras._tf_keras.keras import losses + from keras._tf_keras.keras import metrics + from keras._tf_keras.keras import optimizers + from keras._tf_keras.keras import initializers # pylint: enable=undefined-variable diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index fe250ed3df42f5..82ef4315f4aef1 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -112,7 +112,12 @@ pass if _keras_to_use is not None: - setattr(_current_module, "keras", _keras_to_use) + _module_dir = _module_util.get_parent_dir_for_name(_keras_package_name) + if _module_dir: + _current_module.__path__ = [_module_dir] + _current_module.__path__ + setattr(_current_module, + "keras", + _LazyLoader("keras", globals(), _keras_package_name)) else: # TF will not have `tf.keras` in this case. This should not be silent. _logging.warning("Unable to load `tf.keras`. Check that the `keras` package " From dc60587c40fcfa7e22111e76b2e4ec0a75d561a3 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 25 Sep 2023 11:12:05 -0700 Subject: [PATCH 226/567] [XLA:GPU] Support HLO input for AOT compilation PiperOrigin-RevId: 568271995 --- third_party/xla/xla/service/BUILD | 10 ++++++ .../xla/service/xla_aot_compile_gpu_test.cc | 14 ++++++-- .../xla/xla/service/xla_aot_compile_test.hlo | 7 ++++ third_party/xla/xla/service/xla_compile.bzl | 3 +- .../xla/xla/service/xla_compile_main.cc | 32 +++++++++++++------ 5 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 third_party/xla/xla/service/xla_aot_compile_test.hlo diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 5cd69b9ca4f2fd..2a4e6d77e45cba 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -6968,11 +6968,13 @@ xla_cc_binary( "//xla/service:cpu_plugin", "//xla/service/cpu:cpu_compiler", "//xla/service/cpu:cpu_executable", + "//xla/tools:hlo_module_loader", "@llvm-project//mlir:ArithDialect", "@llvm-project//mlir:FuncDialect", "@llvm-project//mlir:IR", "@llvm-project//mlir:Parser", "@local_tsl//tsl/platform:env", + "@local_tsl//tsl/platform:path", "@local_tsl//tsl/platform:platform_port", "@local_tsl//tsl/platform:protobuf", "@local_tsl//tsl/util:command_line_flags", @@ -7010,6 +7012,13 @@ xla_aot_compile_gpu( module = "xla_aot_compile_test.mlir", ) +xla_aot_compile_gpu( + name = "xla_aot_compile_test_gpu_executable_hlo", + autotune_results = "xla_aot_compile_test_autotune_results.prototxt", + gpu_target_config = "xla_aot_compile_test_gpu_target_config.prototxt", + module = "xla_aot_compile_test.hlo", +) + xla_aot_compile_gpu( name = "xla_aot_compile_test_gpu_executable_constant", autotune_results = "xla_aot_compile_test_autotune_results.prototxt", @@ -7092,6 +7101,7 @@ xla_cc_test( srcs = if_cuda_is_configured(["xla_aot_compile_gpu_test.cc"]), data = if_cuda_is_configured([ ":xla_aot_compile_test_gpu_executable", + ":xla_aot_compile_test_gpu_executable_hlo", ":xla_aot_compile_test_gpu_executable_constant", ":xla_aot_compile_test_gpu_executable_gemm", ":xla_aot_compile_test_gpu_executable_gemm_runtime_autotuning", diff --git a/third_party/xla/xla/service/xla_aot_compile_gpu_test.cc b/third_party/xla/xla/service/xla_aot_compile_gpu_test.cc index d4c8fd91787a7a..2e1283f6d13f0d 100644 --- a/third_party/xla/xla/service/xla_aot_compile_gpu_test.cc +++ b/third_party/xla/xla/service/xla_aot_compile_gpu_test.cc @@ -31,9 +31,12 @@ namespace xla { namespace xla_compile { namespace { -TEST(XlaCompileTest, LoadGpuExecutable) { - std::string path = tsl::io::JoinPath(tsl::testing::XlaSrcRoot(), "service", - "xla_aot_compile_test_gpu_executable"); +class XlaAotCompileTest : public ::testing::TestWithParam {}; + +TEST_P(XlaAotCompileTest, LoadGpuExecutable) { + std::string path = + tsl::io::JoinPath(tsl::testing::XlaSrcRoot(), "service", GetParam() + /*"xla_aot_compile_test_gpu_executable"*/); std::string serialized_aot_result; TF_ASSERT_OK( tsl::ReadFileToString(tsl::Env::Default(), path, &serialized_aot_result)); @@ -76,6 +79,11 @@ TEST(XlaCompileTest, LoadGpuExecutable) { EXPECT_EQ(expected, output); } +INSTANTIATE_TEST_SUITE_P( + TestingAotFormats, XlaAotCompileTest, + testing::Values("xla_aot_compile_test_gpu_executable", + "xla_aot_compile_test_gpu_executable_hlo")); + TEST(XlaCompileTest, LoadGpuExecutableWithConstant) { std::string path = tsl::io::JoinPath(tsl::testing::XlaSrcRoot(), "service", diff --git a/third_party/xla/xla/service/xla_aot_compile_test.hlo b/third_party/xla/xla/service/xla_aot_compile_test.hlo new file mode 100644 index 00000000000000..a1c0d016f0f0f5 --- /dev/null +++ b/third_party/xla/xla/service/xla_aot_compile_test.hlo @@ -0,0 +1,7 @@ +HloModule Module + +ENTRY computation { + p0 = f64[3] parameter(0) + p1 = f64[3] parameter(1) + ROOT out = add(p0, p1) +} diff --git a/third_party/xla/xla/service/xla_compile.bzl b/third_party/xla/xla/service/xla_compile.bzl index 6704bc3f4b00e6..5e3b96f5d52fa7 100644 --- a/third_party/xla/xla/service/xla_compile.bzl +++ b/third_party/xla/xla/service/xla_compile.bzl @@ -45,7 +45,8 @@ def xla_aot_compile_gpu( module, gpu_target_config, autotune_results): - """Runs xla_compile to compile an MHLO or StableHLO module into an AotCompilationResult for GPU + """Runs xla_compile to compile an MHLO, StableHLO or HLO module into an + AotCompilationResult for GPU Args: name: The name of the build rule. diff --git a/third_party/xla/xla/service/xla_compile_main.cc b/third_party/xla/xla/service/xla_compile_main.cc index fd8c4461d56517..2b53a4394c4657 100644 --- a/third_party/xla/xla/service/xla_compile_main.cc +++ b/third_party/xla/xla/service/xla_compile_main.cc @@ -33,8 +33,10 @@ limitations under the License. #include "xla/service/cpu/cpu_compiler.h" #include "xla/service/cpu/cpu_executable.h" #include "xla/statusor.h" +#include "xla/tools/hlo_module_loader.h" #include "tsl/platform/env.h" #include "tsl/platform/init_main.h" +#include "tsl/platform/path.h" #include "tsl/platform/protobuf.h" #include "tsl/util/command_line_flags.h" @@ -52,8 +54,9 @@ namespace xla { namespace xla_compile { const char kUsageHeader[] = - "xla_compile performs ahead-of-time compilation of an MHLO or StableHLO " - "module,\nresulting in an AotCompilationResult compiled for CPU.\n" + "xla_compile performs ahead-of-time compilation of an MHLO, StableHLO or " + "HLO " + "module,\nresulting in an AotCompilationResult compiled for CPU or GPU.\n" "A typical invocation looks like this:\n" "\n" " $ xla_compile --module_file=mymodule.mlir --output_file=output " @@ -100,11 +103,14 @@ StatusOr AotCompileGpuExecutable( } #endif -xla::Status XlaCompileMain(const std::string& module_path, - const std::string& output_path, - const std::string& platform, - const std::string& gpu_target_config_path, - const std::string& autotune_results_path) { +xla::StatusOr> LoadModule( + const std::string& module_path) { + auto format = std::string(tsl::io::Extension(module_path)); + if (format == "hlo" || format == "txt") { + return LoadModuleFromFile( + module_path, hlo_module_loader_details::Config(), + /*format=*/"hlo", [&](HloModuleConfig* c) {}, nullptr); + } std::string module_string; TF_RETURN_IF_ERROR( tsl::ReadFileToString(tsl::Env::Default(), module_path, &module_string)); @@ -132,8 +138,16 @@ xla::Status XlaCompileMain(const std::string& module_path, DebugOptions debug_options = DefaultDebugOptionsIgnoringFlags(); HloModuleConfig config(shape); config.set_debug_options(debug_options); + return HloModule::CreateFromProto(hlo_module_proto, config); +} + +xla::Status XlaCompileMain(const std::string& module_path, + const std::string& output_path, + const std::string& platform, + const std::string& gpu_target_config_path, + const std::string& autotune_results_path) { TF_ASSIGN_OR_RETURN(std::unique_ptr hlo_module, - HloModule::CreateFromProto(hlo_module_proto, config)); + LoadModule(module_path)); // Run AOT compilation. std::string result; @@ -194,7 +208,7 @@ int main(int argc, char* argv[]) { std::string autotune_results_path; std::vector flag_list = { tsl::Flag("module_file", &module_path, - "The path to the MHLO or StableHLO file"), + "The path to the HLO, MHLO or StableHLO file"), tsl::Flag("output_file", &output_path, "The path to the output file"), tsl::Flag("platform", &platform, "The platform on which the built executable runs"), From a914df7952a8ac3cce98266ded227a29a6315fa5 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 25 Sep 2023 11:23:33 -0700 Subject: [PATCH 227/567] [stream_executor] NFC: Clean up stream executor public libraries Clean up and document stream executor "non-executor" public libraries. https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 568275550 --- tensorflow/core/common_runtime/gpu/BUILD | 4 +- .../common_runtime/pluggable_device/BUILD | 4 +- tensorflow/core/util/BUILD | 2 +- .../xla/third_party/tsl/tsl/framework/BUILD | 2 +- third_party/xla/xla/stream_executor/BUILD | 279 +++++++----------- .../xla/xla/stream_executor/data_type.h | 1 - .../xla/stream_executor/device_description.cc | 4 - .../xla/stream_executor/device_description.h | 2 +- .../stream_executor/device_memory_allocator.h | 2 +- .../xla/xla/stream_executor/device_options.h | 6 +- third_party/xla/xla/stream_executor/gpu/BUILD | 1 - .../xla/xla/stream_executor/platform.cc | 17 +- third_party/xla/xla/xla.bzl | 1 - 13 files changed, 133 insertions(+), 192 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index 4652e0051cd8fd..e77d1e25ef2620 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -188,7 +188,7 @@ tf_cuda_library( "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@local_tsl//tsl/framework:device_id_utils", - "@local_xla//xla/stream_executor:device_id_utils", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor/gpu:gpu_cudamallocasync_allocator", "@local_xla//xla/stream_executor/gpu:gpu_init_impl", ] + if_google( @@ -232,7 +232,7 @@ tf_cuda_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/platform:stream_executor", "@eigen_archive//:eigen3", - "@local_xla//xla/stream_executor:device_id_utils", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor/gpu:gpu_init", ] + if_static([":gpu_runtime_impl"]), ) diff --git a/tensorflow/core/common_runtime/pluggable_device/BUILD b/tensorflow/core/common_runtime/pluggable_device/BUILD index 92a6ca4af74fcc..514bad92fc5fec 100644 --- a/tensorflow/core/common_runtime/pluggable_device/BUILD +++ b/tensorflow/core/common_runtime/pluggable_device/BUILD @@ -57,10 +57,8 @@ cc_library( "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@local_tsl//tsl/framework:device_id_utils", - "@local_xla//xla/stream_executor:device_id_utils", + "@local_xla//xla/stream_executor", "@local_xla//xla/stream_executor:device_mem_allocator", - "@local_xla//xla/stream_executor:event", - "@local_xla//xla/stream_executor:kernel", ], alwayslink = 1, ) diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index aa86cb6a0ff568..e01ba6ff6a792e 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -779,7 +779,7 @@ tf_kernel_library( "//tensorflow/core:lib", ] + if_cuda([ "@local_xla//xla/stream_executor/cuda:cuda_blas_utils", - "@local_xla//xla/stream_executor:data_type", + "@local_xla//xla/stream_executor", "@local_tsl//tsl/cuda:cusparse", "@local_config_cuda//cuda:cub_headers", ]) + if_rocm([ diff --git a/third_party/xla/third_party/tsl/tsl/framework/BUILD b/third_party/xla/third_party/tsl/tsl/framework/BUILD index f3c83660c6aece..af0eb46e4ae6e4 100644 --- a/third_party/xla/third_party/tsl/tsl/framework/BUILD +++ b/third_party/xla/third_party/tsl/tsl/framework/BUILD @@ -230,7 +230,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - "//tsl/platform:types", + "//tsl/lib/gtl:int_type", ] + if_static([ ":device_id_impl", ]), diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 6783578a4bc150..e9f3b11e0d0912 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -44,13 +44,14 @@ package_group( # a header-only dependency (this is a very rare exception when we are building dynamic librarires # for open source projects, e.g. Tensorflow, internally at Google we almost always link statically), # or usually on a `sream_executor` target that will also link implementation. - filegroup( name = "stream_executor_public_headers", srcs = [ "allocator_stats.h", "command_buffer.h", + "data_type.h", "device_description.h", + "device_id_utils.h", "device_memory.h", "device_memory_allocator.h", "device_options.h", @@ -87,6 +88,104 @@ filegroup( visibility = ["//visibility:public"], ) +#===--------------------------------------------------------------------------------------------===# +# StreamExecutor public libraries +#===--------------------------------------------------------------------------------------------===# + +# Some of the StreamExecutor libraries that do not depend on StreamExecutor itself (Stream, Kernel, +# Event, etc.) exported as standalone libraries (these libraries should not depend on +# `stream_executor` and `stream_executor_headers` targets). This is mostly a historical artifact of +# an era when StreamExecutor was a part of Tensorflow. + +# TODO(ezhulenev): Consider merging some (all?) of these libraries into StreamExecutor target, e.g. +# does it really make sense to have a separate `device_memory` library which is not usable without +# StreamExecutor. + +tf_proto_library( + name = "device_description_proto", + srcs = ["device_description.proto"], + cc_api_version = 2, + make_default_target_header_only = True, + protodeps = ["//xla:autotune_results_proto"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "device_description", + srcs = ["device_description.cc"], + hdrs = ["device_description.h"], + visibility = ["//visibility:public"], + deps = [ + ":device_description_proto_cc", + ":launch_dim", + "//xla/stream_executor/platform", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/strings", + "@local_tsl//tsl/lib/math:math_util", + "@local_tsl//tsl/platform:statusor", + ], +) + +cc_library( + name = "device_memory", + hdrs = ["device_memory.h"], + visibility = ["//visibility:public"], + deps = ["//xla/stream_executor/platform"], +) + +cc_library( + name = "device_options", + hdrs = ["device_options.h"], + visibility = ["//visibility:public"], + deps = [ + "//xla/stream_executor/platform", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "host_or_device_scalar", + hdrs = ["host_or_device_scalar.h"], + visibility = ["//visibility:public"], + deps = [":device_memory"], +) + +cc_library( + name = "launch_dim", + hdrs = ["launch_dim.h"], + visibility = ["//visibility:public"], + deps = ["@com_google_absl//absl/strings"], +) + +cc_library( + name = "numeric_options", + hdrs = ["numeric_options.h"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "platform", + srcs = ["platform.cc"], + hdrs = ["platform.h"], + visibility = ["//visibility:public"], + deps = [ + ":device_description", + ":device_options", + "//xla/stream_executor/platform", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/platform:statusor", + ], +) + +cc_library( + name = "plugin", + hdrs = ["plugin.h"], + visibility = ["//visibility:public"], +) + #===--------------------------------------------------------------------------------------------===# # StreamExecutor platform-dependent interfaces #===--------------------------------------------------------------------------------------------===# @@ -165,6 +264,7 @@ cc_library( "device_memory.h", "device_memory_allocator.h", "device_options.h", + "device_id_utils.h", "dnn.h", "event.h", "executor_cache.h", @@ -172,6 +272,7 @@ cc_library( "kernel.h", "kernel_cache_config.h", "kernel_spec.h", + "data_type.h", "launch_dim.h", "numeric_options.h", "module_spec.h", @@ -204,8 +305,11 @@ cc_library( "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", + "@local_tsl//tsl/framework:device_id", + "@local_tsl//tsl/framework:device_type", "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:float8", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", @@ -218,77 +322,9 @@ transitive_hdrs( deps = [":stream_executor_headers"], ) -cc_library( - name = "launch_dim", - hdrs = ["launch_dim.h"], - visibility = ["//visibility:public"], - deps = ["@com_google_absl//absl/strings"], -) - -cc_library( - name = "trace_listener", - hdrs = [ - "trace_listener.h", - ], - visibility = ["//visibility:public"], - deps = [ - ":device_memory", - ":kernel", - "//xla/stream_executor/platform", - "@com_google_absl//absl/strings", - "@local_tsl//tsl/platform:status", - ], -) - -tf_proto_library( - name = "device_description_proto", - srcs = ["device_description.proto"], - cc_api_version = 2, - make_default_target_header_only = True, - protodeps = [ - "//xla:autotune_results_proto", - ], - visibility = ["//visibility:public"], -) - -cc_library( - name = "device_description", - srcs = ["device_description.cc"], - hdrs = ["device_description.h"], - visibility = ["//visibility:public"], - deps = [ - ":device_description_proto_cc", - ":launch_dim", - "//xla/stream_executor/platform", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/strings", - "@local_tsl//tsl/lib/math:math_util", - "@local_tsl//tsl/platform:numbers", - ], -) - -cc_library( - name = "kernel_cache_config", - hdrs = ["kernel_cache_config.h"], - visibility = ["//visibility:public"], -) - -cc_library( - name = "module_spec", - hdrs = ["module_spec.h"], - visibility = ["//visibility:public"], - deps = [ - "//xla/stream_executor/platform", - "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:logging", - ], -) - cc_library( name = "allocator_stats", - srcs = [ - "allocator_stats.cc", - ], + srcs = ["allocator_stats.cc"], hdrs = ["allocator_stats.h"], visibility = ["//visibility:public"], deps = [ @@ -298,49 +334,6 @@ cc_library( ], ) -cc_library( - name = "data_type", - hdrs = ["data_type.h"], - visibility = ["//visibility:public"], - deps = [ - ":dnn_proto_cc", - "@local_tsl//tsl/platform:float8", - "@local_tsl//tsl/protobuf:dnn_proto_cc", - ], -) - -cc_library( - name = "device_id_utils", - hdrs = ["device_id_utils.h"], - visibility = ["//visibility:public"], - deps = [ - ":platform", - ":stream_executor", - "@com_google_absl//absl/strings", - "@local_tsl//tsl/framework:device_id_impl", - "@local_tsl//tsl/lib/gtl:int_type", - "@local_tsl//tsl/platform:str_util", - ], -) - -cc_library( - name = "device_memory", - hdrs = ["device_memory.h"], - visibility = ["//visibility:public"], - deps = ["//xla/stream_executor/platform"], -) - -cc_library( - name = "device_options", - hdrs = ["device_options.h"], - visibility = ["//visibility:public"], - deps = [ - "//xla/stream_executor/platform", - "@com_google_absl//absl/strings", - "@local_tsl//tsl/platform:logging", - ], -) - tf_proto_library( name = "dnn_proto", srcs = ["dnn.proto"], @@ -359,36 +352,6 @@ cc_library( ], ) -cc_library( - name = "platform", - srcs = ["platform.cc"], - hdrs = ["platform.h"], - visibility = ["//visibility:public"], - deps = [ - ":plugin", - ":stream_executor_headers", - "//xla/stream_executor/platform", - "@com_google_absl//absl/log:check", - "@com_google_absl//absl/strings", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:logging", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", - ], -) - -cc_library( - name = "plugin", - hdrs = ["plugin.h"], - visibility = ["//visibility:public"], -) - -cc_library( - name = "numeric_options", - hdrs = ["numeric_options.h"], - visibility = ["//visibility:public"], -) - cc_library( name = "device_mem_allocator", hdrs = [ @@ -412,7 +375,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":platform", - ":stream_executor_pimpl_header", + ":stream_executor_headers", "//xla/stream_executor/platform", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:node_hash_map", @@ -459,9 +422,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":device_description", ":device_description_proto_cc", - ":kernel_cache_config", ":platform", ":stream_executor_headers", ":stream_executor_internal", @@ -501,7 +462,7 @@ cc_library( ":fft", ":host_or_device_scalar", ":kernel", - ":launch_dim", + ":kernel_spec", ":platform", ":plugin_registry", ":stream_executor_headers", @@ -558,17 +519,14 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":allocator_stats", - ":data_type", ":device_description", ":device_description_proto_cc", ":device_memory", ":device_options", ":fft", - ":kernel_cache_config", ":kernel_spec", ":launch_dim", ":platform", - ":plugin", ":plugin_registry", ":stream_executor_headers", "//xla/stream_executor/platform", @@ -598,7 +556,6 @@ cc_library( hdrs = ["blas.h"], visibility = ["//visibility:public"], deps = [ - ":data_type", ":stream_executor_headers", "//xla/stream_executor/platform", "@com_google_absl//absl/strings", @@ -608,17 +565,6 @@ cc_library( ], ) -cc_library( - name = "host_or_device_scalar", - hdrs = ["host_or_device_scalar.h"], - visibility = ["//visibility:public"], - deps = [ - ":data_type", - ":device_memory", - "//xla/stream_executor/platform", - ], -) - cc_library( name = "command_buffer", srcs = ["command_buffer.cc"], @@ -656,7 +602,6 @@ cc_library( ":fft", ":multi_platform_manager", ":platform", - ":plugin", ":stream_executor_headers", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", @@ -674,7 +619,6 @@ cc_library( hdrs = ["dnn.h"], visibility = ["//visibility:public"], deps = [ - ":data_type", ":device_description_proto_cc", ":device_memory", ":dnn_proto_cc", @@ -741,10 +685,12 @@ cc_library( name = "stream_executor", textual_hdrs = [ "blas.h", + "data_type.h", "device_description.h", "device_memory.h", "device_memory_allocator.h", "device_options.h", + "device_id_utils.h", "dnn.h", "event.h", "executor_cache.h", @@ -771,6 +717,8 @@ cc_library( deps = [ ":stream_executor_headers", "@com_google_absl//absl/log:check", + "@local_tsl//tsl/framework:device_id", + "@local_tsl//tsl/framework:device_type", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:status", @@ -782,13 +730,12 @@ cc_library( name = "stream_executor_impl", visibility = ["//visibility:public"], deps = [ - ":device_description", ":device_memory", ":dnn", ":dnn_proto_cc", ":event", ":kernel", - ":launch_dim", + ":kernel_spec", ":multi_platform_manager", ":platform", ":stream_executor_headers", diff --git a/third_party/xla/xla/stream_executor/data_type.h b/third_party/xla/xla/stream_executor/data_type.h index 382f87d20fddfb..c0f900d4794e4d 100644 --- a/third_party/xla/xla/stream_executor/data_type.h +++ b/third_party/xla/xla/stream_executor/data_type.h @@ -19,7 +19,6 @@ limitations under the License. #include #include -#include "xla/stream_executor/dnn.pb.h" #include "tsl/platform/float8.h" #include "tsl/protobuf/dnn.pb.h" diff --git a/third_party/xla/xla/stream_executor/device_description.cc b/third_party/xla/xla/stream_executor/device_description.cc index b2b6ffdfbe0851..619a252432e9f4 100644 --- a/third_party/xla/xla/stream_executor/device_description.cc +++ b/third_party/xla/xla/stream_executor/device_description.cc @@ -15,15 +15,11 @@ limitations under the License. #include "xla/stream_executor/device_description.h" -#include #include -#include #include #include -#include "absl/strings/str_cat.h" #include "tsl/lib/math/math_util.h" -#include "tsl/platform/numbers.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h index 936974dd65df27..cbbde07417ba70 100644 --- a/third_party/xla/xla/stream_executor/device_description.h +++ b/third_party/xla/xla/stream_executor/device_description.h @@ -20,7 +20,7 @@ limitations under the License. #ifndef XLA_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_ #define XLA_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_ -#include +#include #include #include #include diff --git a/third_party/xla/xla/stream_executor/device_memory_allocator.h b/third_party/xla/xla/stream_executor/device_memory_allocator.h index fe529648c8252f..7b3ee6eb162f6f 100644 --- a/third_party/xla/xla/stream_executor/device_memory_allocator.h +++ b/third_party/xla/xla/stream_executor/device_memory_allocator.h @@ -42,7 +42,7 @@ class DeviceMemoryAllocator; // // We say that an instance of ScopedDeviceMemory is "active" if it currently // owns a (possibly empty) slice of memory on the device. Moving, -// Release()'ing, Free()'ing, and other actions can deactive an active object. +// Release()'ing, Free()'ing, and other actions can deactivate an active object. template class ScopedDeviceMemory { public: diff --git a/third_party/xla/xla/stream_executor/device_options.h b/third_party/xla/xla/stream_executor/device_options.h index 5fc86b25e1c97a..776fa4220813c4 100644 --- a/third_party/xla/xla/stream_executor/device_options.h +++ b/third_party/xla/xla/stream_executor/device_options.h @@ -19,11 +19,13 @@ limitations under the License. #ifndef XLA_STREAM_EXECUTOR_DEVICE_OPTIONS_H_ #define XLA_STREAM_EXECUTOR_DEVICE_OPTIONS_H_ + #include +#include +#include +#include "absl/log/check.h" #include "absl/strings/str_join.h" -#include "xla/stream_executor/platform/port.h" -#include "tsl/platform/logging.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index a790045a3e42f0..aec49df203b217 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -407,7 +407,6 @@ tsl_gpu_library( deps = [ ":gpu_init_impl", "//xla/stream_executor", - "//xla/stream_executor:device_id_utils", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", diff --git a/third_party/xla/xla/stream_executor/platform.cc b/third_party/xla/xla/stream_executor/platform.cc index 9f2c5c0978f128..801e381ffa8172 100644 --- a/third_party/xla/xla/stream_executor/platform.cc +++ b/third_party/xla/xla/stream_executor/platform.cc @@ -15,11 +15,12 @@ limitations under the License. #include "xla/stream_executor/platform.h" -#include "absl/strings/str_cat.h" -#include "xla/stream_executor/platform/port.h" -#include "xla/stream_executor/stream_executor_pimpl.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/logging.h" +#include +#include + +#include "absl/status/status.h" +#include "xla/stream_executor/device_options.h" +#include "tsl/platform/status.h" namespace stream_executor { @@ -47,10 +48,10 @@ bool Platform::Initialized() const { return true; } tsl::Status Platform::Initialize( const std::map &platform_options) { if (!platform_options.empty()) { - return tsl::Status(absl::StatusCode::kUnimplemented, - "this platform does not support custom initialization"); + return absl::UnimplementedError( + "this platform does not support custom initialization"); } - return ::tsl::OkStatus(); + return tsl::OkStatus(); } } // namespace stream_executor diff --git a/third_party/xla/xla/xla.bzl b/third_party/xla/xla/xla.bzl index 0cfaa67192e898..3fe599358e2baf 100644 --- a/third_party/xla/xla/xla.bzl +++ b/third_party/xla/xla/xla.bzl @@ -96,7 +96,6 @@ def xla_cc_test( clean_dep("//xla/service/gpu:hlo_op_profile_proto_cc_impl"), clean_dep("//xla/stream_executor:dnn_proto_cc_impl"), clean_dep("//xla/stream_executor:stream_executor_impl"), - clean_dep("//xla/stream_executor:device_id_utils"), clean_dep("//xla/stream_executor/gpu:gpu_cudamallocasync_allocator"), clean_dep("//xla/stream_executor/gpu:gpu_init_impl"), clean_dep("@local_tsl//tsl/profiler/utils:time_utils_impl"), From d00f9eeb3105a7abe75ed7965b5a6b22e9aef596 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 25 Sep 2023 11:55:03 -0700 Subject: [PATCH 228/567] [stream_executor] Add back device_id_utils library PiperOrigin-RevId: 568284525 --- third_party/xla/xla/stream_executor/BUILD | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index e9f3b11e0d0912..b0f10c4d83d930 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -144,6 +144,17 @@ cc_library( ], ) +cc_library( + name = "device_id_utils", + hdrs = ["device_id_utils.h"], + visibility = ["//visibility:public"], + deps = [ + ":platform", + ":stream_executor", + "@local_tsl//tsl/framework:device_id_impl", + ], +) + cc_library( name = "host_or_device_scalar", hdrs = ["host_or_device_scalar.h"], From 3890704374c2a0ff95d4b3e2c684f19275f8071f Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Mon, 25 Sep 2023 11:59:56 -0700 Subject: [PATCH 229/567] Move resource hoisting to before TPURewrite. PiperOrigin-RevId: 568285914 --- tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc index dfe07231999791..7e56a6496b5292 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc @@ -254,6 +254,9 @@ void CreateTPUBridgePipelineImpl( pm.addNestedPass( mlir::TF::CreateRewriteTPUEmbeddingOpsPass()); pm.addPass(CreateTPUAnnotateDynamicShapeInputsPass()); + pm.addNestedPass( + TF::CreateHoistReplicateInvariantResourceWritesPass()); + pm.addPass(CreateTPURewritePass(module_name)); pm.addPass(createSymbolDCEPass()); pm.addNestedPass(TFDevice::CreateEmbeddingProgramKeyPass()); @@ -261,8 +264,6 @@ void CreateTPUBridgePipelineImpl( TFDevice::CreateReplicateInvariantOpHoistingPass()); pm.addPass(CreateTPUMergeVariablesWithExecutePass()); pm.addNestedPass(CreateExtractTPUCopyWithDynamicShapeOpPass()); - pm.addNestedPass( - TF::CreateHoistReplicateInvariantResourceWritesPass()); pm.addNestedPass(CreateTPUColocateCompositeResourceOps()); if (tensorflow::GetMlirCommonFlags() ->tf_mlir_enable_tpu_variable_runtime_reformatting_pass) { From 9e29f611891202074bb669f2ffc5c865552644cd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 12:14:01 -0700 Subject: [PATCH 230/567] Regenerate pyi files after mypy update PiperOrigin-RevId: 568290052 --- .../python/client/_pywrap_tf_session.pyi | 57 ++++++++++++------- .../python/util/_pywrap_checkpoint_reader.pyi | 3 +- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/client/_pywrap_tf_session.pyi b/tensorflow/python/client/_pywrap_tf_session.pyi index 35e1832acb9ac4..2d0c304ff7d8c9 100644 --- a/tensorflow/python/client/_pywrap_tf_session.pyi +++ b/tensorflow/python/client/_pywrap_tf_session.pyi @@ -118,13 +118,20 @@ class OpsByName: class PyGraph: @classmethod def __init__(cls, *args, **kwargs) -> None: ... - def Dismantle(self, *args, **kwargs) -> Any: ... - def _add_op(self, *args, **kwargs) -> Any: ... - def _get_operation_by_name(self, *args, **kwargs) -> Any: ... - def _op_def_for_type(self, *args, **kwargs) -> Any: ... - def get_operations(self, *args, **kwargs) -> Any: ... - def new_operations(self, *args, **kwargs) -> Any: ... - def num_operations(self, *args, **kwargs) -> Any: ... + @classmethod + def Dismantle(cls, *args, **kwargs) -> Any: ... + @classmethod + def _add_op(cls, *args, **kwargs) -> Any: ... + @classmethod + def _get_operation_by_name(cls, *args, **kwargs) -> Any: ... + @classmethod + def _op_def_for_type(cls, *args, **kwargs) -> Any: ... + @classmethod + def get_operations(cls, *args, **kwargs) -> Any: ... + @classmethod + def new_operations(cls, *args, **kwargs) -> Any: ... + @classmethod + def num_operations(cls, *args, **kwargs) -> Any: ... @property def _nodes_by_id(self) -> OpsById: ... @property @@ -140,14 +147,22 @@ class PyOperation: graph @classmethod def __init__(cls, *args, **kwargs) -> None: ... - def _add_control_input(self, *args, **kwargs) -> Any: ... - def _add_control_inputs(self, *args, **kwargs) -> Any: ... - def _add_outputs(self, *args, **kwargs) -> Any: ... - def _init_outputs(self, *args, **kwargs) -> Any: ... - def _remove_all_control_inputs(self, *args, **kwargs) -> Any: ... - def _set_device_from_string(self, *args, **kwargs) -> Any: ... - def _tf_input(self, *args, **kwargs) -> Any: ... - def _tf_output(self, *args, **kwargs) -> Any: ... + @classmethod + def _add_control_input(cls, *args, **kwargs) -> Any: ... + @classmethod + def _add_control_inputs(cls, *args, **kwargs) -> Any: ... + @classmethod + def _add_outputs(cls, *args, **kwargs) -> Any: ... + @classmethod + def _init_outputs(cls, *args, **kwargs) -> Any: ... + @classmethod + def _remove_all_control_inputs(cls, *args, **kwargs) -> Any: ... + @classmethod + def _set_device_from_string(cls, *args, **kwargs) -> Any: ... + @classmethod + def _tf_input(cls, *args, **kwargs) -> Any: ... + @classmethod + def _tf_output(cls, *args, **kwargs) -> Any: ... @property def _c_op(self) -> TF_Operation: ... @property @@ -175,10 +190,14 @@ class PyTensor: _shape_val: object @classmethod def __init__(cls, *args, **kwargs) -> None: ... - def _as_tf_output(self, *args, **kwargs) -> Any: ... - def _rank(self, *args, **kwargs) -> Any: ... - def _set_shape(self, *args, **kwargs) -> Any: ... - def consumers(self, *args, **kwargs) -> Any: ... + @classmethod + def _as_tf_output(cls, *args, **kwargs) -> Any: ... + @classmethod + def _rank(cls, *args, **kwargs) -> Any: ... + @classmethod + def _set_shape(cls, *args, **kwargs) -> Any: ... + @classmethod + def consumers(cls, *args, **kwargs) -> Any: ... @property def _dtype(self) -> object: ... @property diff --git a/tensorflow/python/util/_pywrap_checkpoint_reader.pyi b/tensorflow/python/util/_pywrap_checkpoint_reader.pyi index f194a759355e08..1402d60148afeb 100644 --- a/tensorflow/python/util/_pywrap_checkpoint_reader.pyi +++ b/tensorflow/python/util/_pywrap_checkpoint_reader.pyi @@ -17,7 +17,8 @@ from typing import Any class CheckpointReader: def __init__(self, arg0: str) -> None: ... - def CheckpointReader_GetTensor(self, *args, **kwargs) -> Any: ... + @classmethod + def CheckpointReader_GetTensor(cls, arg0: CheckpointReader, arg1: str) -> object: ... def _GetVariableToDataTypeMap(self, *args, **kwargs) -> Any: ... def _HasTensor(self, arg0: str) -> bool: ... def debug_string(self) -> bytes: ... From e892ce92a745b2dc79349d6aa08eaf33e59e3744 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 12:19:37 -0700 Subject: [PATCH 231/567] Support dynamic shape for zero-point-offset calculation for DotGeneral PiperOrigin-RevId: 568291491 --- .../bridge/convert_mhlo_quant_to_int.cc | 229 +++++++--- .../bridge/convert-mhlo-quant-to-int.mlir | 422 ++++++++++++++---- 2 files changed, 494 insertions(+), 157 deletions(-) diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc index 0a081269b4db4e..d3136f00e33fbe 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_mhlo_quant_to_int.cc @@ -647,18 +647,72 @@ Value CreateZeroPointPartialOffset(OpBuilder &builder, Location loc, return mul_op; } -llvm::SmallVector CalculateBroadcastDims( - Value zp_contribution, llvm::ArrayRef contracting_dims, - llvm::ArrayRef batching_dims, int64_t non_batching_starting_idx) { +Value GetDimValue(OpBuilder &builder, Location loc, Value tensor, + mlir::ShapedType tensor_shape, int64_t idx) { + if (tensor_shape.isDynamicDim(idx)) { + // Get dynamic dim using GetDimensionSizeOp and convert result from to + // <1xi64>. + Value dynamic_dim = builder.create( + loc, tensor, builder.getI64IntegerAttr(idx)); + dynamic_dim = builder.create( + loc, RankedTensorType::get(ArrayRef{}, builder.getI64Type()), + dynamic_dim); + return builder.create( + loc, RankedTensorType::get({1}, builder.getI64Type()), dynamic_dim); + } else { + return builder.create( + loc, DenseIntElementsAttr::get( + RankedTensorType::get({1}, builder.getI64Type()), + {tensor_shape.getDimSize(idx)})); + } +} + +Value CalculateDynamicOutputDims(OpBuilder &builder, Location loc, Value lhs, + Value rhs, + mhlo::DotDimensionNumbersAttr dims) { + mlir::ShapedType lhs_shape = lhs.getType().cast(); + mlir::ShapedType rhs_shape = rhs.getType().cast(); + // Calculate each output dim and concatenate into a 1D tensor. + llvm::SmallVector output_dims; + for (int64_t i = 0; i < lhs_shape.getRank(); ++i) { + if (absl::c_count(dims.getLhsBatchingDimensions(), i) != 0) { + output_dims.push_back(GetDimValue(builder, loc, lhs, lhs_shape, i)); + } + } + for (int64_t i = 0; i < lhs_shape.getRank(); ++i) { + if (absl::c_count(dims.getLhsContractingDimensions(), i) == 0 && + absl::c_count(dims.getLhsBatchingDimensions(), i) == 0) { + output_dims.push_back(GetDimValue(builder, loc, lhs, lhs_shape, i)); + } + } + for (int64_t i = 0; i < rhs_shape.getRank(); ++i) { + if (absl::c_count(dims.getRhsContractingDimensions(), i) == 0 && + absl::c_count(dims.getRhsBatchingDimensions(), i) == 0) { + output_dims.push_back(GetDimValue(builder, loc, rhs, rhs_shape, i)); + } + } + return builder.create(loc, output_dims, + builder.getI64IntegerAttr(0)); +} + +Value BroadcastZpContribution(OpBuilder &builder, Location loc, + Value zp_contribution, + llvm::ArrayRef contracting_dims, + llvm::ArrayRef batching_dims, + int64_t non_batching_starting_idx, + RankedTensorType output_tensor_type, + Value &output_dims_value, Value lhs, Value rhs, + mhlo::DotDimensionNumbersAttr dims) { // This function calculates the dims for broadcasting from the - // zero-point-offset tensor to the final output tensor. + // zero-point-offset tensor to the final output tensor, and then do the + // broadcast. auto zp_contribution_rank = zp_contribution.getType().dyn_cast().getRank(); llvm::SmallVector broadcast_dims; broadcast_dims.resize(zp_contribution_rank, 0); // Result tensor will have batching dims first, then LHS result dims, then // RHS result dims. So non-batching result dims index doesn't start from 0. - // The arg non_batching_starting_idx is used distinguish LHS and RHS. + // The arg non_batching_starting_idx is used to distinguish LHS and RHS. int64_t result_batching_idx = 0; int64_t result_non_batching_idx = non_batching_starting_idx; for (int64_t idx = 0, original_idx = 0; idx < zp_contribution_rank; @@ -674,7 +728,28 @@ llvm::SmallVector CalculateBroadcastDims( broadcast_dims[idx] = result_batching_idx++; } } - return broadcast_dims; + // Use broadcast_in_dim or dyanmic_broadcast_in_dim based on input shape + // dynamism. + if (zp_contribution.getType().dyn_cast().hasStaticShape()) { + zp_contribution = builder.create( + loc, output_tensor_type, zp_contribution, + DenseIntElementsAttr::get( + RankedTensorType::get({static_cast(broadcast_dims.size())}, + builder.getI64Type()), + broadcast_dims)); + } else { + if (!output_dims_value) { + output_dims_value = + CalculateDynamicOutputDims(builder, loc, lhs, rhs, dims); + } + zp_contribution = builder.create( + loc, output_tensor_type, zp_contribution, output_dims_value, + DenseIntElementsAttr::get( + RankedTensorType::get({static_cast(broadcast_dims.size())}, + builder.getI64Type()), + broadcast_dims)); + } + return zp_contribution; } Value CalculateZeroPointOffset(OpBuilder &builder, Location loc, Value lhs, @@ -709,57 +784,64 @@ Value CalculateZeroPointOffset(OpBuilder &builder, Location loc, Value lhs, auto output_tensor_type = RankedTensorType::get(output_dims, output_element_type); - Value result = builder.create( - loc, DenseIntElementsAttr::get(output_tensor_type, {0})); - + Value result = nullptr; + Value output_dims_value = nullptr; // Calculate LHS contribution when RHS zp is non-zero. if (rhs_zp != 0) { Value lhs_zp_contribution = CreateZeroPointPartialOffset( builder, loc, lhs, rhs_zp, dims.getLhsContractingDimensions()); // Broadcast lhs ZP contribution to result tensor shape. - llvm::SmallVector broadcast_dims = CalculateBroadcastDims( - lhs_zp_contribution, dims.getLhsContractingDimensions(), - dims.getLhsBatchingDimensions(), - dims.getLhsBatchingDimensions().size()); - lhs_zp_contribution = builder.create( - loc, output_tensor_type, lhs_zp_contribution, - DenseIntElementsAttr::get( - RankedTensorType::get({static_cast(broadcast_dims.size())}, - builder.getI64Type()), - broadcast_dims)); - result = builder.create(loc, result, lhs_zp_contribution); + lhs_zp_contribution = BroadcastZpContribution( + builder, loc, lhs_zp_contribution, dims.getLhsContractingDimensions(), + dims.getLhsBatchingDimensions(), dims.getLhsBatchingDimensions().size(), + output_tensor_type, output_dims_value, lhs, rhs, dims); + result = lhs_zp_contribution; } // Calculate RHS contribution when LHS zp is non-zero. if (lhs_zp != 0) { Value rhs_zp_contribution = CreateZeroPointPartialOffset( builder, loc, rhs, lhs_zp, dims.getRhsContractingDimensions()); // Broadcast rhs ZP contribution to result tensor shape. - llvm::SmallVector broadcast_dims = CalculateBroadcastDims( - rhs_zp_contribution, dims.getRhsContractingDimensions(), + rhs_zp_contribution = BroadcastZpContribution( + builder, loc, rhs_zp_contribution, dims.getRhsContractingDimensions(), dims.getRhsBatchingDimensions(), - lhs_shape.getRank() - dims.getLhsContractingDimensions().size()); - - rhs_zp_contribution = builder.create( - loc, output_tensor_type, rhs_zp_contribution, - DenseIntElementsAttr::get( - RankedTensorType::get({static_cast(broadcast_dims.size())}, - builder.getI64Type()), - broadcast_dims)); - result = builder.create(loc, result, rhs_zp_contribution); + lhs_shape.getRank() - dims.getLhsContractingDimensions().size(), + output_tensor_type, output_dims_value, lhs, rhs, dims); + if (result) { + result = builder.create(loc, result, rhs_zp_contribution); + } else { + result = rhs_zp_contribution; + } } if (lhs_zp != 0 && rhs_zp != 0) { // Contributions from LHS_ZP * RHS_ZP. // This is multiplied by the product of all contracting dimensions. - int32_t contracting_dim_total = 1; + int32_t contracting_dim_total_int = 1; + bool has_dynamic_contracting_dim = false; + Value dynamic_contracting_dim_total = builder.create( + loc, builder.getI32IntegerAttr(static_cast(1))); + // Calculate the product for static/dynamic dims separately. for (const int64_t rhs_idx : dims.getRhsContractingDimensions()) { - contracting_dim_total *= rhs_shape.getDimSize(rhs_idx); + if (rhs_shape.isDynamicDim(rhs_idx)) { + has_dynamic_contracting_dim = true; + auto dim = builder.create( + loc, rhs, builder.getI64IntegerAttr(rhs_idx)); + dynamic_contracting_dim_total = builder.create( + loc, dynamic_contracting_dim_total, dim); + } else { + contracting_dim_total_int *= rhs_shape.getDimSize(rhs_idx); + } } - const int32_t zp_constant_offset = static_cast(lhs_zp) * + Value zp_offset_value = builder.create( + loc, builder.getI32IntegerAttr(static_cast(lhs_zp) * static_cast(rhs_zp) * - contracting_dim_total; - auto zp_offset_value = builder.create( - loc, builder.getI32IntegerAttr(zp_constant_offset)); + contracting_dim_total_int)); + // Multiply the static dims contribution by the dynamic one if needed. + if (has_dynamic_contracting_dim) { + zp_offset_value = builder.create( + loc, zp_offset_value, dynamic_contracting_dim_total); + } result = builder.create(loc, result, zp_offset_value, nullptr); } @@ -772,7 +854,7 @@ LogicalResult RewriteDotGeneralOp(DotOp op, DotOpAdaptor adaptor, const mhlo::DotDimensionNumbersAttr &dims, ConversionPatternRewriter &rewriter) { // Lower Dot/DotGeneral UQ ops to DotGeneral int. - // Assumes that operands and results are static-shape tensors of uq types. + // Assumes that operands and results are uq types. auto lhs_element_quant_type = getElementTypeOrSelf(op.getLhs().getType()) .template dyn_cast(); @@ -788,14 +870,14 @@ LogicalResult RewriteDotGeneralOp(DotOp op, DotOpAdaptor adaptor, op.getResult().getType().clone(rewriter.getI32Type()); // Dot result - // = dot((lhs - zp_l) * scale_l, (rhs - zp_r) * scale_r) / scale_res - // + zp_res - // = dot(lhs - zp_l, rhs - zp_r) * scale_l * scale_r / scale_res + zp_res - // = dot(lhs, rhs) * combined_scale + combined_zp + // = dot((lhs - zp_l) * scale_l, (rhs - zp_r) * scale_r) / scale_res + // + zp_res + // = dot(lhs - zp_l, rhs - zp_r) * scale_l * scale_r / scale_res + zp_res + // = dot(lhs, rhs) * combined_scale + combined_zp // where: - // zp_offset = zp_l*rhs + zp_r*lhs - zp_l*zp_r // combined_scale = scale_l * scale_r / scale_res // combined_zp = res_zp - zp_offset * combined_scale + // zp_offset = zp_l*rhs + zp_r*lhs - zp_l*zp_r SmallVector operands{lhs, rhs}; Value res_i32 = rewriter.create( op->getLoc(), res_int32_tensor_type, operands, attrs); @@ -805,10 +887,10 @@ LogicalResult RewriteDotGeneralOp(DotOp op, DotOpAdaptor adaptor, rhs_element_quant_type.getZeroPoint(), dims); // Multiply dot result and zp_offset by combined_scale only if it is not 1.0. - float combined_scale_fp = lhs_element_quant_type.getScale() * - rhs_element_quant_type.getScale() / - res_element_quant_type.getScale(); - if (combined_scale_fp != 1.0f) { + double combined_scale_fp = lhs_element_quant_type.getScale() * + rhs_element_quant_type.getScale() / + res_element_quant_type.getScale(); + if (combined_scale_fp != 1.0) { Value combined_scale = rewriter.create( op->getLoc(), rewriter.getF32FloatAttr(combined_scale_fp)); @@ -822,26 +904,32 @@ LogicalResult RewriteDotGeneralOp(DotOp op, DotOpAdaptor adaptor, res_i32 = rewriter.create(op->getLoc(), res_int32_tensor_type, res_f32); - auto zp_offset_float32_tensor_type = - zp_offset.getType().dyn_cast().clone(rewriter.getF32Type()); - zp_offset = rewriter.create( - op->getLoc(), zp_offset_float32_tensor_type, zp_offset); - zp_offset = rewriter.create( - op->getLoc(), zp_offset_float32_tensor_type, zp_offset, combined_scale, - nullptr); - zp_offset = rewriter.create( - op->getLoc(), - zp_offset_float32_tensor_type.clone(rewriter.getI32Type()), zp_offset); + // Skip zp_offset if it is 0. + if (zp_offset) { + auto zp_offset_float32_tensor_type = + zp_offset.getType().dyn_cast().clone( + rewriter.getF32Type()); + zp_offset = rewriter.create( + op->getLoc(), zp_offset_float32_tensor_type, zp_offset); + zp_offset = rewriter.create( + op->getLoc(), zp_offset_float32_tensor_type, zp_offset, + combined_scale, nullptr); + zp_offset = rewriter.create( + op->getLoc(), + zp_offset_float32_tensor_type.clone(rewriter.getI32Type()), + zp_offset); + } } - Value res_zp = rewriter.create( + Value combined_zp = rewriter.create( op->getLoc(), rewriter.getI32IntegerAttr(res_element_quant_type.getZeroPoint())); - Value combined_zp = rewriter.create( - op->getLoc(), res_int32_tensor_type, res_zp, zp_offset, nullptr); - - rewriter.replaceOpWithNewOp(op, res_int32_tensor_type, res_i32, - combined_zp); + if (zp_offset) { + combined_zp = rewriter.create( + op->getLoc(), res_int32_tensor_type, combined_zp, zp_offset, nullptr); + } + rewriter.replaceOpWithNewOp( + op, res_int32_tensor_type, res_i32, combined_zp, nullptr); return success(); } @@ -852,7 +940,7 @@ class ConvertUniformQuantizedDotOp : public OpConversionPattern { LogicalResult matchAndRewrite( mhlo::DotOp op, mhlo::DotOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - // Use matchAndRewriteDotLikeOp for DotHybrid and dynamic shapes. + // Use matchAndRewriteDotLikeOp for DotHybrid. if (!op.getLhs() .getType() .getElementType() @@ -860,10 +948,7 @@ class ConvertUniformQuantizedDotOp : public OpConversionPattern { !op.getRhs() .getType() .getElementType() - .isa() || - !op.getLhs().getType().cast().hasStaticShape() || - !op.getRhs().getType().cast().hasStaticShape() || - !op.getResult().getType().cast().hasStaticShape()) { + .isa()) { return matchAndRewriteDotLikeOp(op, adaptor, rewriter); } @@ -889,8 +974,7 @@ class ConvertUniformQuantizedDotGeneralOp LogicalResult matchAndRewrite( mhlo::DotGeneralOp op, mhlo::DotGeneralOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - // Use matchAndRewriteDotLikeOp for DotHybridGeneral case and dynamic - // shapes. + // Use matchAndRewriteDotLikeOp for DotHybridGeneral. if (!op.getLhs() .getType() .getElementType() @@ -898,10 +982,7 @@ class ConvertUniformQuantizedDotGeneralOp !op.getRhs() .getType() .getElementType() - .isa() || - !op.getLhs().getType().cast().hasStaticShape() || - !op.getRhs().getType().cast().hasStaticShape() || - !op.getResult().getType().cast().hasStaticShape()) { + .isa()) { return matchAndRewriteDotLikeOp(op, adaptor, rewriter); } return RewriteDotGeneralOp(op, adaptor, op->getAttrs(), diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir b/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir index 9e00c80fb5aab5..81882209250883 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir +++ b/tensorflow/compiler/mlir/quantization/stablehlo/tests/bridge/convert-mhlo-quant-to-int.mlir @@ -303,74 +303,152 @@ func.func @uniform_quantize_requantize_merged_zp_zero_and_dequantize(%arg0: tens // ----- +// CHECK-LABEL: func @uniform_quantize_dot_dequantize +func.func @uniform_quantize_dot_dequantize(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + + // CHECK: "mhlo.dot_general" + // CHECK-SAME: lhs_contracting_dimensions = [1] + // CHECK-SAME: rhs_contracting_dimensions = [0] + // CHECK-SAME: (tensor<2x2xi8>, tensor<2x2xi8>) -> tensor<2x2xi32> + %2 = "mhlo.dot" (%0, %1) : (tensor<2x2x!quant.uniform>, tensor<2x2x!quant.uniform>) -> tensor<2x2x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : (tensor<2x2x!quant.uniform>) -> tensor<2x2xf32> + return %3 : tensor<2x2xf32> +} + +// ----- + +// CHECK-LABEL: func @uniform_quantize_dot_int4 +func.func @uniform_quantize_dot_int4(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> + + // CHECK: "mhlo.dot_general" + // CHECK-SAME: lhs_contracting_dimensions = [1] + // CHECK-SAME: rhs_contracting_dimensions = [0] + // CHECK-SAME: (tensor<2x2xi4>, tensor<2x2xi4>) -> tensor<2x2xi32> + %2 = "mhlo.dot" (%0, %1): (tensor<2x2x!quant.uniform>, tensor<2x2x!quant.uniform>) -> tensor<2x2x!quant.uniform> + return +} + +// ----- + // CHECK-LABEL: func @uniform_quantize_dot_dequantize_dynamic func.func @uniform_quantize_dot_dequantize_dynamic(%arg0: tensor, %arg1: tensor) -> tensor { %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> - %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> + %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> - // CHECK: %[[VAL1:.*]] = mhlo.convert %[[VAL0:.*]] : (tensor) -> tensor - // CHECK: %[[VAL3:.*]] = chlo.broadcast_subtract %[[VAL1]], %[[VAL2:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL5:.*]] = mhlo.convert %[[VAL4:.*]] : (tensor) -> tensor - // CHECK: %[[VAL7:.*]] = chlo.broadcast_subtract %[[VAL5]], %[[VAL6:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL8:.*]] = "mhlo.dot"(%[[VAL3]], %[[VAL7]]) : (tensor, tensor) -> tensor - // CHECK: %[[VAL10:.*]] = chlo.broadcast_multiply %[[VAL8]], %[[VAL9:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL12:.*]] = chlo.broadcast_add %[[VAL10]], %[[VAL11:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL13:.*]] = mhlo.floor %[[VAL12]] : tensor - // CHECK: %[[VAL15:.*]] = chlo.broadcast_add %[[VAL13]], %[[VAL14:.*]] : (tensor, tensor) -> tensor - // CHECK: %[[VAL16:.*]] = mhlo.convert %[[VAL15]] : (tensor) -> tensor - // CHECK: %[[VAL19:.*]] = mhlo.clamp %[[VAL17:.*]], %[[VAL16]], %[[VAL18:.*]] : (tensor, tensor, tensor) -> tensor - // CHECK: %[[VAL20:.*]] = mhlo.convert %[[VAL19]] : (tensor) -> tensor - %2 = "mhlo.dot" (%0, %1) : (tensor>, tensor>) -> tensor> + // CHECK: mhlo.dot_general + // CHECK-SAME: lhs_contracting_dimensions = [1] + // CHECK-SAME: rhs_contracting_dimensions = [0] + // CHECK-SAME: (tensor, tensor) -> tensor + + // CHECK: mhlo.reduce + // CHECK-SAME: applies mhlo.add across dimensions = [1] + // CHECK-SAME: (tensor, tensor) -> tensor + // CHECK: "mhlo.get_dimension_size" + // CHECK-SAME: {dimension = 0 : i64} : (tensor) -> tensor + // CHECK: "mhlo.get_dimension_size" + // CHECK-SAME: {dimension = 1 : i64} : (tensor) -> tensor + // CHECK: %[[DYN_DIMS:.*]] = "mhlo.concatenate" + // CHECK-SAME: {dimension = 0 : i64} + // CHECK: mhlo.dynamic_broadcast_in_dim + // CHECK-SAME: %[[DYN_DIMS]]) + // CHECK-SAME: broadcast_dimensions = dense<0> + // CHECK-SAME: (tensor, tensor<2xi64>) -> tensor + + // CHECK: mhlo.reduce + // CHECK-SAME: applies mhlo.add across dimensions = [0] + // CHECK-SAME: (tensor, tensor) -> tensor + // CHECK: mhlo.dynamic_broadcast_in_dim + // CHECK-SAME: %[[DYN_DIMS]]) + // CHECK-SAME: broadcast_dimensions = dense<1> + // CHECK-SAME: (tensor, tensor<2xi64>) -> tensor + %2 = "mhlo.dot" (%0, %1) : (tensor>, tensor>) -> tensor> %3 = mhlo.uniform_dequantize %2 : (tensor>) -> tensor return %3 : tensor } // ----- -// CHECK-LABEL: func @uniform_quantize_dot_dynamic_int4 -func.func @uniform_quantize_dot_dynamic_int4(%arg0: tensor, %arg1: tensor) { - %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> - %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> +// CHECK-LABEL: func @uniform_quantize_dot_dequantize_dynamic_int4 +func.func @uniform_quantize_dot_dequantize_dynamic_int4(%arg0: tensor, %arg1: tensor) -> tensor { + %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> + %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> - // CHECK: %[[VAL2:.*]] = "mhlo.dot"(%[[VAL0:.*]], %[[VAL1:.*]]) : (tensor, tensor) -> tensor - // CHECK: %[[VAL4:.*]] = mhlo.convert %[[VAL3:.*]] : (tensor) -> tensor - // CHECK-DAG: %[[VAL5:.*]] = mhlo.constant dense<-8> : tensor - // CHECK-DAG: %[[VAL6:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[VAL7:.*]] = mhlo.clamp %[[VAL5]], %[[VAL4]], %[[VAL6]] : (tensor, tensor, tensor) -> tensor - // CHECK: %[[VAL8:.*]] = mhlo.convert %[[VAL7]] : (tensor) -> tensor - %2 = "mhlo.dot" (%0, %1): (tensor>, tensor>) -> tensor> - return + // CHECK: mhlo.dot_general + // CHECK-SAME: lhs_contracting_dimensions = [1] + // CHECK-SAME: rhs_contracting_dimensions = [0] + // CHECK-SAME: (tensor, tensor) -> tensor + %2 = "mhlo.dot" (%0, %1) : (tensor>, tensor>) -> tensor> + %3 = mhlo.uniform_dequantize %2 : (tensor>) -> tensor + return %3 : tensor } // ----- -// CHECK-LABEL: func @uniform_quantize_dot_dequantize -func.func @uniform_quantize_dot_dequantize(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> { - %0 = mhlo.uniform_quantize %arg0 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> - %1 = mhlo.uniform_quantize %arg1 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> +// CHECK-LABEL: func @uniform_quantize_dot_dequantize_dynamic_contracting_dim +func.func @uniform_quantize_dot_dequantize_dynamic_contracting_dim(%arg0: tensor<2x?xf32>, %arg1: tensor) -> tensor<2x2xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x?xf32>) -> tensor<2x?x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor) -> tensor> // CHECK: "mhlo.dot_general" // CHECK-SAME: lhs_contracting_dimensions = [1] // CHECK-SAME: rhs_contracting_dimensions = [0] - // CHECK-SAME: (tensor<2x2xi8>, tensor<2x2xi8>) -> tensor<2x2xi32> - %2 = "mhlo.dot" (%0, %1) : (tensor<2x2x!quant.uniform>, tensor<2x2x!quant.uniform>) -> tensor<2x2x!quant.uniform> + // CHECK-SAME: (tensor<2x?xi8>, tensor) -> tensor<2x2xi32> + + // CHECK: mhlo.reduce + // CHECK-SAME: applies mhlo.add across dimensions = [1] + // CHECK-SAME: (tensor<2x?xi32>, tensor) -> tensor<2xi32> + + // CHECK: mhlo.reduce + // CHECK-SAME: applies mhlo.add across dimensions = [0] + // CHECK-SAME: (tensor, tensor) -> tensor<2xi32> + + // CHECK: %[[DYNAMIC_DIM_INIT:.*]] = mhlo.constant dense<1> : tensor + // CHECK: %[[DYNAMIC_DIM:.*]] = "mhlo.get_dimension_size" + // CHECK-SAME: {dimension = 0 : i64} : (tensor) -> tensor + // CHECK: %[[DYNAMIC_DIM_TOTAL:.*]] = mhlo.multiply + // CHECK-SAME: %[[DYNAMIC_DIM_INIT]], %[[DYNAMIC_DIM]] + // CHECK: %[[DIMS:.*]] = mhlo.constant dense<9> : tensor + // CHECK: %[[DIMS_1:.*]] = mhlo.multiply %[[DIMS]], %[[DYNAMIC_DIM_TOTAL]] + // CHECK: chlo.broadcast_subtract %[[ZP_OFFSET:.*]], %[[DIMS:.*]] + %2 = "mhlo.dot" (%0, %1) : (tensor<2x?x!quant.uniform>, tensor>) -> tensor<2x2x!quant.uniform> %3 = mhlo.uniform_dequantize %2 : (tensor<2x2x!quant.uniform>) -> tensor<2x2xf32> return %3 : tensor<2x2xf32> } // ----- -// CHECK-LABEL: func @uniform_quantize_dot_int4 -func.func @uniform_quantize_dot_int4(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) { - %0 = mhlo.uniform_quantize %arg0 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> - %1 = mhlo.uniform_quantize %arg1 : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform> +// CHECK-LABEL: func @uniform_quantize_dot_dequantize_dynamic_result_dim +func.func @uniform_quantize_dot_dequantize_dynamic_result_dim(%arg0: tensor, %arg1: tensor<2x?xf32>) -> tensor { + %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> + %1 = mhlo.uniform_quantize %arg1 : (tensor<2x?xf32>) -> tensor<2x?x!quant.uniform> // CHECK: "mhlo.dot_general" // CHECK-SAME: lhs_contracting_dimensions = [1] // CHECK-SAME: rhs_contracting_dimensions = [0] - // CHECK-SAME: (tensor<2x2xi4>, tensor<2x2xi4>) -> tensor<2x2xi32> - %2 = "mhlo.dot" (%0, %1): (tensor<2x2x!quant.uniform>, tensor<2x2x!quant.uniform>) -> tensor<2x2x!quant.uniform> - return + // CHECK-SAME: (tensor, tensor<2x?xi8>) -> tensor + + // CHECK: mhlo.reduce + // CHECK-SAME: applies mhlo.add across dimensions = [1] + // CHECK-SAME: (tensor, tensor) -> tensor + // CHECK: mhlo.dynamic_broadcast_in_dim + // CHECK-SAME: broadcast_dimensions = dense<0> + // CHECK-SAME: (tensor, tensor<2xi64>) -> tensor + + // CHECK: mhlo.reduce + // CHECK-SAME: applies mhlo.add across dimensions = [0] + // CHECK-SAME: (tensor<2x?xi32>, tensor) -> tensor + // CHECK: mhlo.dynamic_broadcast_in_dim + // CHECK-SAME: broadcast_dimensions = dense<1> + // CHECK-SAME: (tensor, tensor<2xi64>) -> tensor + + + %2 = "mhlo.dot" (%0, %1) : (tensor>, tensor<2x?x!quant.uniform>) -> tensor> + %3 = mhlo.uniform_dequantize %2 : (tensor>) -> tensor + return %3 : tensor } // ----- @@ -389,8 +467,6 @@ func.func @uniform_quantize_dot_general_dequantize( // CHECK-SAME: lhs_contracting_dimensions = [2] // CHECK-SAME: rhs_contracting_dimensions = [0] - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.constant dense<0> : tensor<2x5x8xi32> - // Zero point offset contribution from LHS tensor * RHS ZP. // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x6xi8>) @@ -407,7 +483,6 @@ func.func @uniform_quantize_dot_general_dequantize( // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[0, 1]> // CHECK-SAME: (tensor<2x5xi32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_2:.*]] = mhlo.add %[[ZP_TOTAL_1]], %[[LHS_ZP_BCAST]] // Zero point offset contribution from RHS tensor * LHS ZP. @@ -425,12 +500,12 @@ func.func @uniform_quantize_dot_general_dequantize( // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.add %[[ZP_TOTAL_2]], %[[RHS_ZP_BCAST]] + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] // Zero point offset contribution from LHS ZP * RHS ZP. // CHECK: %[[ZPS:.*]] = mhlo.constant dense<90> : tensor - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_3]], %[[ZPS]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] // CHECK-SAME: (tensor<2x5x8xi32>, tensor) -> tensor<2x5x8xi32> // Combine dot result with zero point offset and output final result. @@ -443,17 +518,17 @@ func.func @uniform_quantize_dot_general_dequantize( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> - // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_5:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_7:.*]] = mhlo.convert %[[ZP_TOTAL_6]] + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_8:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_7]] + // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> - // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_8]] + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_6]] %2 = "mhlo.dot_general" (%0, %1) { dot_dimension_numbers = #mhlo.dot< @@ -473,6 +548,87 @@ func.func @uniform_quantize_dot_general_dequantize( // ----- +// CHECK-LABEL: func @uniform_quantize_dot_general_dequantize_combined_scale_1 +func.func @uniform_quantize_dot_general_dequantize_combined_scale_1( + %arg0: tensor<2x5x6xf32>, %arg1: tensor<6x8x2xf32>) -> tensor<2x5x8xf32> { + %0 = mhlo.uniform_quantize %arg0 : (tensor<2x5x6xf32>) + -> tensor<2x5x6x!quant.uniform> + %1 = mhlo.uniform_quantize %arg1 : (tensor<6x8x2xf32>) + -> tensor<6x8x2x!quant.uniform> + + // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" + // CHECK-SAME: lhs_batching_dimensions = [0] + // CHECK-SAME: rhs_batching_dimensions = [2] + // CHECK-SAME: lhs_contracting_dimensions = [2] + // CHECK-SAME: rhs_contracting_dimensions = [0] + + // Zero point offset contribution from LHS tensor * RHS ZP. + + // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x6xi8>) + // CHECK-SAME: -> tensor<2x5x6xi32> + // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [2] + // CHECK-SAME: (tensor<2x5x6xi32>, tensor) + // CHECK-SAME: -> tensor<2x5xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor + // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<2x5xi32>, tensor) -> tensor<2x5xi32> + // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[0, 1]> + // CHECK-SAME: (tensor<2x5xi32>) -> tensor<2x5x8xi32> + + // Zero point offset contribution from RHS tensor * LHS ZP. + + // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x8x2xi8>) + // CHECK-SAME: -> tensor<6x8x2xi32> + // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [0] + // CHECK-SAME: (tensor<6x8x2xi32>, tensor) + // CHECK-SAME: -> tensor<8x2xi32> + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor + // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor<8x2xi32>, tensor) -> tensor<8x2xi32> + // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) + // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> + // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] + + // Zero point offset contribution from LHS ZP * RHS ZP. + + // CHECK: %[[ZPS:.*]] = mhlo.constant dense<90> : tensor + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] + // CHECK-SAME: (tensor<2x5x8xi32>, tensor) -> tensor<2x5x8xi32> + + // Combine dot result with zero point offset and output final result. + // Do not multiply by combined scale since it is 1.0 and thus no-op. + + // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor + // CHECK: %[[ZP_TOTAL_3:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_2]] + // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> + // CHECK: chlo.broadcast_add %[[DOT_RES]], %[[ZP_TOTAL_3]] + + %2 = "mhlo.dot_general" (%0, %1) { + dot_dimension_numbers = #mhlo.dot< + lhs_batching_dimensions = [0], + rhs_batching_dimensions = [2], + lhs_contracting_dimensions = [2], + rhs_contracting_dimensions = [0] + >} : ( + tensor<2x5x6x!quant.uniform>, + tensor<6x8x2x!quant.uniform> + ) -> tensor<2x5x8x!quant.uniform> + %3 = mhlo.uniform_dequantize %2 : ( + tensor<2x5x8x!quant.uniform> + ) -> tensor<2x5x8xf32> + return %3 : tensor<2x5x8xf32> +} + +// ----- + // CHECK-LABEL: func @uniform_quantize_dot_general_dequantize_multiple_batching_dims func.func @uniform_quantize_dot_general_dequantize_multiple_batching_dims( %arg0: tensor<2x5x3x7x6xf32>, %arg1: tensor<6x2x7x8x3xf32>) -> tensor<2x3x5x8xf32> { @@ -487,8 +643,6 @@ func.func @uniform_quantize_dot_general_dequantize_multiple_batching_dims( // CHECK-SAME: lhs_contracting_dimensions = [4, 3] // CHECK-SAME: rhs_contracting_dimensions = [0, 2]>} - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.constant dense<0> : tensor<2x3x5x8xi32> - // Zero point offset contribution from LHS tensor * RHS ZP. // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x3x7x6xi8>) @@ -505,7 +659,6 @@ func.func @uniform_quantize_dot_general_dequantize_multiple_batching_dims( // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[0, 2, 1]> // CHECK-SAME: (tensor<2x5x3xi32>) -> tensor<2x3x5x8xi32> - // CHECK: %[[ZP_TOTAL_2:.*]] = mhlo.add %[[ZP_TOTAL_1]], %[[LHS_ZP_BCAST]] // Zero point offset contribution from RHS tensor * LHS ZP. @@ -523,12 +676,12 @@ func.func @uniform_quantize_dot_general_dequantize_multiple_batching_dims( // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[0, 3, 1]> // CHECK-SAME: (tensor<2x8x3xi32>) -> tensor<2x3x5x8xi32> - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.add %[[ZP_TOTAL_2]], %[[RHS_ZP_BCAST]] + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] // Zero point offset contribution from LHS ZP * RHS ZP. // CHECK: %[[ZPS:.*]] = mhlo.constant dense<630> : tensor - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_3]], %[[ZPS]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] // CHECK-SAME: (tensor<2x3x5x8xi32>, tensor) -> tensor<2x3x5x8xi32> // Combine dot result with zero point offset and output final result. @@ -541,18 +694,18 @@ func.func @uniform_quantize_dot_general_dequantize_multiple_batching_dims( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor<2x3x5x8xf32>) -> tensor<2x3x5x8xi32> - // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] // CHECK-SAME: (tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xf32> - // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_5:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_7:.*]] = mhlo.convert %[[ZP_TOTAL_6]] + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] // CHECK-SAME: (tensor<2x3x5x8xf32>) -> tensor<2x3x5x8xi32> // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_8:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_7]] + // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] // CHECK-SAME: (tensor, tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xi32> - // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_8]] - + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_6]] + %2 = "mhlo.dot_general" (%0, %1) { dot_dimension_numbers = #mhlo.dot< lhs_batching_dimensions = [0, 2], @@ -585,8 +738,6 @@ func.func @uniform_quantize_dot_general_dequantize_rhs_zero_zp( // CHECK-SAME: lhs_contracting_dimensions = [2] // CHECK-SAME: rhs_contracting_dimensions = [0] - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.constant dense<0> : tensor<2x5x8xi32> - // Zero point offset contribution from LHS tensor * RHS ZP is 0 and skipped. // Zero point offset contribution from RHS tensor * LHS ZP. @@ -605,7 +756,6 @@ func.func @uniform_quantize_dot_general_dequantize_rhs_zero_zp( // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_2:.*]] = mhlo.add %[[ZP_TOTAL_1]], %[[RHS_ZP_BCAST]] // Zero point offset contribution from LHS ZP * RHS ZP is 0 and skipped. @@ -619,17 +769,17 @@ func.func @uniform_quantize_dot_general_dequantize_rhs_zero_zp( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.convert %[[RHS_ZP_BCAST]] // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_1:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_3]] // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> - // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_6]] + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_4]] %2 = "mhlo.dot_general" (%0, %1) { dot_dimension_numbers = #mhlo.dot< @@ -665,8 +815,6 @@ func.func @uniform_quantize_dot_general_dequantize_zero_zp( // Both LHS/RHS have zero zp. No zp contribution. - // CHECK-DAG: %[[ZP_CONTRIB:.*]] = mhlo.constant dense<0> : tensor<2x5x8xi32> - // CHECK-DAG: %[[COMBINED_SCALE:.*]] = mhlo.constant dense<1.500000e+00> : tensor // CHECK: %[[RES_FP:.*]] = mhlo.convert %[[DOT_RES]] : // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> @@ -675,17 +823,8 @@ func.func @uniform_quantize_dot_general_dequantize_zero_zp( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.convert %[[ZP_CONTRIB]] - // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> - // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_1:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] - // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> - // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_3]] - // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> - // CHECK: mhlo.add %[[RES_INT]], %[[ZP_TOTAL_4]] + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[RES_ZP]] %2 = "mhlo.dot_general" (%0, %1) { dot_dimension_numbers = #mhlo.dot< @@ -705,6 +844,123 @@ func.func @uniform_quantize_dot_general_dequantize_zero_zp( // ----- +// CHECK-LABEL: func @uniform_quantize_dot_general_dequantize_multiple_dynamic_dims +func.func @uniform_quantize_dot_general_dequantize_multiple_dynamic_dims( + %arg0: tensor, %arg1: tensor<6x?x?x8x3xf32>) -> tensor { + %0 = mhlo.uniform_quantize %arg0 : (tensor) + -> tensor> + %1 = mhlo.uniform_quantize %arg1 : (tensor<6x?x?x8x3xf32>) + -> tensor<6x?x?x8x3x!quant.uniform> + + // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" + // CHECK-SAME: lhs_batching_dimensions = [0, 2] + // CHECK-SAME: rhs_batching_dimensions = [1, 4] + // CHECK-SAME: lhs_contracting_dimensions = [4, 3] + // CHECK-SAME: rhs_contracting_dimensions = [0, 2]>} + + // Zero point offset contribution from LHS tensor * RHS ZP. + + // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor) + // CHECK-SAME: -> tensor + // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [4, 3] + // CHECK-SAME: (tensor, tensor) + // CHECK-SAME: -> tensor + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor + // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor, tensor) -> tensor + + // Calculate output dynamic dims. + // CHECK: %[[DIM_1_1:.*]] = "mhlo.get_dimension_size"(%[[LHS]]) + // CHECK-SAME: {dimension = 0 : i64} + // CHECK: %[[DIM_1_2:.*]] = mhlo.convert %[[DIM_1_1]] : (tensor) -> tensor + // CHECK: %[[DIM_1:.*]] = mhlo.reshape %[[DIM_1_2]] : (tensor) -> tensor<1xi64> + // CHECK: %[[DIM_2:.*]] = mhlo.constant dense<3> : tensor<1xi64> + // CHECK: %[[DIM_3_1:.*]] = "mhlo.get_dimension_size"(%[[LHS]]) + // CHECK-SAME: {dimension = 1 : i64} + // CHECK: %[[DIM_3_2:.*]] = mhlo.convert %[[DIM_3_1]] : (tensor) -> tensor + // CHECK: %[[DIM_3:.*]] = mhlo.reshape %[[DIM_3_2]] : (tensor) -> tensor<1xi64> + // CHECK: %[[DIM_4:.*]] = mhlo.constant dense<8> : tensor<1xi64> + // CHECK: %[[OUTPUT_DIMS:.*]] = "mhlo.concatenate" + // CHECK-SAME: %[[DIM_1]], %[[DIM_2]], %[[DIM_3]], %[[DIM_4]] + + // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.dynamic_broadcast_in_dim" + // CHECK-SAME: (%[[LHS_ZP_CONTRIB]], %[[OUTPUT_DIMS]]) + // CHECK-SAME: broadcast_dimensions = dense<[0, 2, 1]> + // CHECK-SAME: (tensor, tensor<4xi64>) -> tensor + + // Zero point offset contribution from RHS tensor * LHS ZP. + + // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x?x?x8x3xi8>) + // CHECK-SAME: -> tensor<6x?x?x8x3xi32> + // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [0, 2] + // CHECK-SAME: (tensor<6x?x?x8x3xi32>, tensor) + // CHECK-SAME: -> tensor + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor + // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor, tensor) -> tensor + + // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.dynamic_broadcast_in_dim" + // CHECK-SAME: (%[[RHS_ZP_CONTRIB]], %[[OUTPUT_DIMS]]) + // CHECK-SAME: broadcast_dimensions = dense<[0, 3, 1]> + // CHECK-SAME: (tensor, tensor<4xi64>) -> tensor + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] + + // Zero point offset contribution from LHS ZP * RHS ZP. + + // CHECK: %[[ZPS_INIT:.*]] = mhlo.constant dense<1> : tensor + // CHECK: %[[DYN_DIM:.*]] = "mhlo.get_dimension_size"(%[[RHS]]) + // CHECK: %[[ZPS_1:.*]] = mhlo.multiply %[[ZPS_INIT]], %[[DYN_DIM]] + // CHECK: %[[STATIC_DIM:.*]] = mhlo.constant dense<90> : tensor + // CHECK: %[[ZPS:.*]] = mhlo.multiply %[[STATIC_DIM]], %[[ZPS_1]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] + // CHECK-SAME: (tensor, tensor) -> tensor + + // Combine dot result with zero point offset and output final result. + + // CHECK: %[[COMBINED_SCALE:.*]] = mhlo.constant dense<5.000000e-01> : tensor + // CHECK: %[[RES_FP:.*]] = mhlo.convert %[[DOT_RES]] + // CHECK-SAME: (tensor) -> tensor + // CHECK: %[[RES_FP_1:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RES_FP:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] + // CHECK-SAME: (tensor) -> tensor + + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK-SAME: (tensor) -> tensor + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK-SAME: (tensor) -> tensor + + // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor + // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] + // CHECK-SAME: (tensor, tensor) -> tensor + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_6]] + + %2 = "mhlo.dot_general" (%0, %1) { + dot_dimension_numbers = #mhlo.dot< + lhs_batching_dimensions = [0, 2], + rhs_batching_dimensions = [1, 4], + lhs_contracting_dimensions = [4, 3], + rhs_contracting_dimensions = [0, 2] + >} : ( + tensor>, + tensor<6x?x?x8x3x!quant.uniform> + ) -> tensor> + %3 = mhlo.uniform_dequantize %2 : ( + tensor> + ) -> tensor + return %3 : tensor +} + +// ----- + // CHECK-LABEL: func @uniform_quantized_convolution func.func @uniform_quantized_convolution(%arg0: tensor, %arg1: tensor) { %0 = mhlo.uniform_quantize %arg0 : (tensor) -> tensor> From 676af55749f95c486c6fce5b8b550c5412e28a5b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 12:49:49 -0700 Subject: [PATCH 232/567] Use MLIR op lowering of FusedBatchNormV3 and FusedBatchNormGradV3 for tf2xla for PiperOrigin-RevId: 568299087 --- tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc | 3 ++- tensorflow/compiler/tf2xla/kernels/BUILD | 1 + tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc index 95e6bd8be37c29..d4c0703e5b0093 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc @@ -2304,7 +2304,8 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { // Apply Bessel's correction on the variance. int total_input_size = bn_train_input_type_tensor.getNumElements(); int total_scale_size = scale_type_tensor.getNumElements(); - int sample_size = total_input_size / total_scale_size; + int sample_size = + total_scale_size > 0 ? total_input_size / total_scale_size : 0; int sample_size_minus_one = std::max(1, sample_size - 1); double factor = static_cast(sample_size) / static_cast(sample_size_minus_one); diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index a3b082939c1dc8..e5ff2ae36c0c42 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -3732,6 +3732,7 @@ tf_kernel_library( ":while_op", ":xla_call_module_op", "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:mlir_xla_op_kernel", "//tensorflow/compiler/tf2xla:xla_compilation_device", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla:xla_context", diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc index f361d39fee92c8..b1878892a2cf79 100644 --- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/compiler/tf2xla/kernels/relu_op.h" +#include "tensorflow/compiler/tf2xla/mlir_xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" @@ -237,7 +238,7 @@ class FusedBatchNormOpEx : public FusedBatchNormOp { REGISTER_XLA_OP(Name("FusedBatchNorm"), FusedBatchNormOp); REGISTER_XLA_OP(Name("FusedBatchNormV2"), FusedBatchNormOp); -REGISTER_XLA_OP(Name("FusedBatchNormV3"), FusedBatchNormOpV3); +REGISTER_XLA_OP(Name("FusedBatchNormV3"), MlirXlaOpKernel); REGISTER_XLA_OP(Name("_FusedBatchNormEx"), FusedBatchNormOpEx); class FusedBatchNormGradOp : public XlaOpKernel { @@ -354,7 +355,7 @@ class FusedBatchNormGradOp : public XlaOpKernel { REGISTER_XLA_OP(Name("FusedBatchNormGrad"), FusedBatchNormGradOp); REGISTER_XLA_OP(Name("FusedBatchNormGradV2"), FusedBatchNormGradOp); -REGISTER_XLA_OP(Name("FusedBatchNormGradV3"), FusedBatchNormGradOp); +REGISTER_XLA_OP(Name("FusedBatchNormGradV3"), MlirXlaOpKernel); } // namespace } // namespace tensorflow From 2991aeb60931800b8103d6a89bc5d3b669f9ee73 Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Mon, 25 Sep 2023 13:08:44 -0700 Subject: [PATCH 233/567] Use similarly signed types for comparison In the downstream TFLM project, comparing int and size_t types results in sign-compare compiler warnings. PiperOrigin-RevId: 568304134 --- tensorflow/lite/kernels/internal/reference/sub.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/kernels/internal/reference/sub.h b/tensorflow/lite/kernels/internal/reference/sub.h index 862bee149a5a69..1a74aebeafc2f1 100644 --- a/tensorflow/lite/kernels/internal/reference/sub.h +++ b/tensorflow/lite/kernels/internal/reference/sub.h @@ -36,7 +36,7 @@ struct SubImpl { static void BroadcastInput1(const ArithmeticParams& params, const T* input1_data, const T* input2_data, T* output_data, size_t size, F binary_func) { - for (int c = 0; c < size; ++c) { + for (size_t c = 0; c < size; ++c) { output_data[c] = binary_func(input1_data[0], input2_data[c], params); } } @@ -45,7 +45,7 @@ struct SubImpl { static void BroadcastInput2(const ArithmeticParams& params, const T* input1_data, const T* input2_data, T* output_data, size_t size, F binary_func) { - for (int c = 0; c < size; ++c) { + for (size_t c = 0; c < size; ++c) { output_data[c] = binary_func(input1_data[c], input2_data[0], params); } } @@ -54,7 +54,7 @@ struct SubImpl { static void ElementWise(const ArithmeticParams& params, const T* input1_data, const T* input2_data, T* output_data, size_t size, F binary_func) { - for (int c = 0; c < size; ++c) { + for (size_t c = 0; c < size; ++c) { output_data[c] = binary_func(input1_data[c], input2_data[c], params); } } @@ -146,7 +146,7 @@ inline void BroadcastSubRecursiveDimensions( size_t* compressed_input1_stride, size_t* compressed_input2_stride, size_t* compressed_output_shape, F binary_func) { if (dimension > 0) { - for (int c = 0; c < compressed_output_shape[dimension]; ++c) { + for (size_t c = 0; c < compressed_output_shape[dimension]; ++c) { size_t input1_offset_c = *input1_offset_p; size_t input2_offset_c = *input2_offset_p; BroadcastSubRecursiveDimensions( From 731a6beacd6ca2dfe3a24bbe47c18eb130307a3f Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 25 Sep 2023 13:24:59 -0700 Subject: [PATCH 234/567] [TSL:CUDA] Improve the "I didn't find this symbol" stub for cudaGetErrorString() and cudaGetErrorName(). These functions return strings; return a generic error here instead of returning an invalid integer value. Fixes crash in JAX CI. PiperOrigin-RevId: 568308538 --- .../xla/third_party/tsl/tsl/cuda/BUILD.bazel | 1 + .../third_party/tsl/tsl/cuda/cudart_stub.cc | 24 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel index 71bed13a8bae96..10195390362436 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel +++ b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel @@ -123,6 +123,7 @@ cc_library( ":cuda", "//tsl/platform:dso_loader", "//tsl/platform:env", + "@com_google_absl//absl/container:flat_hash_set", "@local_config_cuda//cuda:cuda_headers", ], "//conditions:default": [], diff --git a/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc index cfae868dc667fc..a3797b5c751cd8 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc +++ b/third_party/xla/third_party/tsl/tsl/cuda/cudart_stub.cc @@ -16,6 +16,9 @@ limitations under the License. // This file wraps cuda runtime calls with dso loader so that we don't need to // have explicit linking to libcuda. +#include + +#include "absl/container/flat_hash_set.h" #include "third_party/gpus/cuda/include/cuda_runtime_api.h" #include "tsl/platform/dso_loader.h" #include "tsl/platform/env.h" @@ -47,11 +50,23 @@ const char *kSymbols[] = { constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char *); +absl::flat_hash_set const &ErrorStringSymbols() { + static auto *syms = new absl::flat_hash_set{ + "cudaGetErrorName", + "cudaGetErrorString", + }; + return *syms; +} + } // namespace extern "C" { -static cudaError_t CudartGetSymbolNotFoundError() { +static const char *ReturnStringError() { + return "Error loading CUDA libraries. GPU will not be used."; +} + +static cudaError_t GetSymbolNotFoundError() { return cudaErrorSharedObjectSymbolNotFound; } @@ -62,7 +77,12 @@ void _cudart_tramp_resolve(int i) { CHECK_LT(i, kNumSymbols); void *p = LoadSymbol(kSymbols[i]); if (!p) { - p = reinterpret_cast(&CudartGetSymbolNotFoundError); + const auto &error_string_symbols = ErrorStringSymbols(); + if (error_string_symbols.find(kSymbols[i]) != error_string_symbols.end()) { + p = reinterpret_cast(&ReturnStringError); + } else { + p = reinterpret_cast(&GetSymbolNotFoundError); + } } _cudart_tramp_table[i] = p; } From 257a679f7c80046ce6398413d2db767b3e15aa08 Mon Sep 17 00:00:00 2001 From: Emilio Cota Date: Mon, 25 Sep 2023 13:33:11 -0700 Subject: [PATCH 235/567] [xla:gpu] xfeed: add missing absl/synchronization dep PiperOrigin-RevId: 568310784 --- third_party/xla/xla/service/gpu/BUILD | 1 + third_party/xla/xla/service/gpu/xfeed_queue.h | 1 + 2 files changed, 2 insertions(+) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 9d988927395b17..3f0ca3b23d173c 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -3048,6 +3048,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/synchronization", "@local_tsl//tsl/platform:logging", ], ) diff --git a/third_party/xla/xla/service/gpu/xfeed_queue.h b/third_party/xla/xla/service/gpu/xfeed_queue.h index e787ee1aa0b3dc..09fb778d90eb07 100644 --- a/third_party/xla/xla/service/gpu/xfeed_queue.h +++ b/third_party/xla/xla/service/gpu/xfeed_queue.h @@ -22,6 +22,7 @@ limitations under the License. #include #include "absl/base/thread_annotations.h" +#include "absl/synchronization/mutex.h" #include "tsl/platform/logging.h" namespace xla { From f1b7a6a604dc847ddf6edb90f25c8a42f2434c58 Mon Sep 17 00:00:00 2001 From: Luke Boyer Date: Mon, 25 Sep 2023 13:37:11 -0700 Subject: [PATCH 236/567] Legalize EmptyTensorList as custom TensorListReserve with length 0. PiperOrigin-RevId: 568311796 --- .../mlir/lite/tests/legalize-tensorlist.mlir | 10 ++++++ .../lite/transforms/legalize_tensorlist.cc | 12 +++++-- .../lite/transforms/legalize_tensorlist.td | 8 +++++ .../kernels/variants/py/end_to_end_test.py | 36 +++++++++++++++++++ 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tensorlist.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tensorlist.mlir index f97bab00c89503..9a8434489387fd 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tensorlist.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tensorlist.mlir @@ -100,3 +100,13 @@ func.func @notAllOpsSupportedNotLegalized(%arg0: tensor } + +// ----- + +// CHECK-LABEL: listEmptyToListReserve +func.func @listEmptyToListReserve(%arg0: tensor, %arg1: tensor) -> tensor>> { + %0 = "tf.EmptyTensorList"(%arg0, %arg1) : (tensor, tensor) -> tensor>> + // CHECK: %cst = arith.constant dense<0> : tensor + // CHECK: %0 = "tfl.custom"(%arg0, %cst) {custom_code = "TensorListReserve", custom_option = #tfl} : (tensor, tensor) -> tensor>> + func.return %0 : tensor>> +} diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc index 820021284b833b..68ca0e53192a43 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.cc @@ -81,9 +81,14 @@ std::optional CustomOptions( mlir::MLIRContext* context, mlir::Operation* op) { if (auto reserve = llvm::dyn_cast_or_null(op)) { - mlir::Type element_type = reserve.getElementDtype(); tflite::TensorType tflite_type = - tflite::ConvertTypeToTensorType(element_type); + tflite::ConvertTypeToTensorType(reserve.getElementDtype()); + + return CreateListReserveOptions(context, tflite_type); + } + if (auto empty = llvm::dyn_cast_or_null(op)) { + tflite::TensorType tflite_type = + tflite::ConvertTypeToTensorType(empty.getElementDtype()); return CreateListReserveOptions(context, tflite_type); } @@ -120,6 +125,9 @@ bool IsOpSupported(mlir::Operation* op) { if (auto get_item = llvm::dyn_cast_or_null(op)) { element_type = get_item.getElementDtype(); } + if (auto empty = llvm::dyn_cast_or_null(op)) { + element_type = empty.getElementDtype(); + } if (!element_type.has_value()) return false; // TODO(b/288302706) add support for all types handled in the diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td index be13cd7c4ef326..5f8757fea2c844 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td @@ -17,6 +17,10 @@ include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" include "tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td" include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td" include "mlir/IR/OpBase.td" +include "mlir/Dialect/Arith/IR/ArithOps.td" + +def ConstDenseElementsI32ZeroAttr + : NativeCodeCall<"$_builder.create($_loc, DenseElementsAttr::get(RankedTensorType::get({}, $_builder.getI32Type()), {0}))">; def Size2InputRange : NativeCodeCall< "SmallVector{$0, $1}">; @@ -52,3 +56,7 @@ def LegalizeTensorListFromTensor : Pat<(TF_TensorListFromTensorOp $input, $eleme def LegalizeTensorListGetItem : Pat<(TF_TensorListGetItemOp $input, $index, $element_shape), (TFL_CustomOp (Size3InputRange $input, $index, $element_shape), (CreateStringAttr<"\"TensorListGetItem\"">), (EmptyCustomOptions))>; + +def LegalizeTensorListEmpty : Pat<(TF_EmptyTensorListOp:$tf_op $element_shape, $unused_max_elements), + (TFL_CustomOp (Size2InputRange $element_shape, (ConstDenseElementsI32ZeroAttr)), + (CreateStringAttr<"\"TensorListReserve\"">), (CustomOptions $tf_op))>; diff --git a/tensorflow/lite/kernels/variants/py/end_to_end_test.py b/tensorflow/lite/kernels/variants/py/end_to_end_test.py index 115e799cd5b7a0..dff0d397b3c69d 100644 --- a/tensorflow/lite/kernels/variants/py/end_to_end_test.py +++ b/tensorflow/lite/kernels/variants/py/end_to_end_test.py @@ -237,6 +237,42 @@ def reserve_stack(stack_element_shape) -> tf.Tensor: self.assertEqual(tf_out.shape, output_tensor.shape) self.assertTrue((tf_out == output_tensor).numpy().all()) + def test_empty_tensorlist_set_stack(self): + @tf.function( + input_signature=[ + tf.TensorSpec(shape=tf.TensorShape(None), dtype=tf.int32) + ] + ) + def empty_tensorlist_set_stack(x) -> tf.Tensor: + l = list_ops.empty_tensor_list(tf.TensorShape(None), tf.int32) + l2 = list_ops.tensor_list_set_item(l, 0, x, True) + + return list_ops.tensor_list_stack(l2, tf.int32) + + interpreter = self._get_interpreter_from_c_func(empty_tensorlist_set_stack) + + input_index = interpreter.get_input_details()[0]["index"] + + interpreter.resize_tensor_input(input_index, [2, 2]) + + interpreter.allocate_tensors() + + input_tensor = np.ndarray(shape=[2, 2], dtype=np.int32) + input_tensor.fill(0) + interpreter.set_tensor(input_index, input_tensor) + + interpreter.invoke() + + output_tensor = interpreter.get_tensor( + interpreter.get_output_details()[0]["index"] + ) + + tf_out = empty_tensorlist_set_stack(input_tensor) + + self.assertEqual(tf_out.dtype, output_tensor.dtype) + self.assertEqual(tf_out.shape, output_tensor.shape) + self.assertTrue((tf_out == output_tensor).numpy().all()) + if __name__ == "__main__": googletest.main() From 3bc2b252352279036c5957b44156d9366fda0474 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 25 Sep 2023 13:40:07 -0700 Subject: [PATCH 237/567] [stream_executor] Add GPU graph handle to GpuCommandBuffer https://github.com/openxla/xla/issues/5857 PiperOrigin-RevId: 568312557 --- .../xla/stream_executor/cuda/cuda_driver.cc | 2 +- .../stream_executor/cuda/cuda_gpu_executor.cc | 8 ++- third_party/xla/xla/stream_executor/gpu/BUILD | 3 + .../stream_executor/gpu/gpu_command_buffer.cc | 57 +++++++++++++++++++ .../stream_executor/gpu/gpu_command_buffer.h | 32 ++++++++++- .../xla/stream_executor/platform/initialize.h | 4 +- 6 files changed, 100 insertions(+), 6 deletions(-) diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc index bf180cb86486d1..4daea1ebc57fb2 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_driver.cc @@ -662,7 +662,7 @@ GpuDriver::GraphNodeGetType(CUgraphNode node) { /* static */ tsl::Status GpuDriver::DestroyGraphExec(CUgraphExec exec) { VLOG(2) << "Destroying CUDA executable graph " << exec; RETURN_IF_CUDA_RES_ERROR(cuGraphExecDestroy(exec), - "Failed to destroy CUDA graph"); + "Failed to destroy CUDA executable graph"); return ::tsl::OkStatus(); } diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc b/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc index 0d882033ee56e5..483f9a973083d1 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc @@ -40,9 +40,11 @@ limitations under the License. #include "xla/stream_executor/cuda/cuda_driver.h" #include "xla/stream_executor/cuda/cuda_platform_id.h" #include "xla/stream_executor/gpu/gpu_command_buffer.h" +#include "xla/stream_executor/gpu/gpu_driver.h" #include "xla/stream_executor/gpu/gpu_event.h" #include "xla/stream_executor/gpu/gpu_kernel.h" #include "xla/stream_executor/gpu/gpu_timer.h" +#include "xla/stream_executor/gpu/gpu_types.h" #include "xla/stream_executor/kernel_cache_config.h" #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform/initialize.h" @@ -844,8 +846,10 @@ GpuExecutor::GetStreamImplementation() { tsl::StatusOr> GpuExecutor::GetCommandBufferImplementation() { - return std::unique_ptr( - new GpuCommandBuffer()); + VLOG(2) << "Create CUDA command buffer (CUDA graph)"; + GpuGraphHandle graph = nullptr; + TF_RETURN_IF_ERROR(GpuDriver::CreateGraph(&graph)); + return std::make_unique(graph); } void* GpuExecutor::GpuContextHack() { return context_; } diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index aec49df203b217..87a41605b5046d 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -91,7 +91,10 @@ cc_library( ":gpu_driver_header", ":gpu_executor_header", ":gpu_stream", + ":gpu_types_header", "//xla/stream_executor:stream_executor_headers", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", ], diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc index 3068e37710a618..9e18de6611234e 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc @@ -14,3 +14,60 @@ limitations under the License. ==============================================================================*/ #include "xla/stream_executor/gpu/gpu_command_buffer.h" + +#include +#include + +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "xla/stream_executor/gpu/gpu_driver.h" +#include "xla/stream_executor/gpu/gpu_types.h" + +namespace stream_executor::gpu { + +//===----------------------------------------------------------------------===// +// GpuCommandBuffer resource usage tracking +//===----------------------------------------------------------------------===// + +static std::atomic allocated_execs(0); +static std::atomic alive_execs(0); + +static int64_t NotifyExecCreated() { + alive_execs.fetch_add(1, std::memory_order_relaxed); + return allocated_execs.fetch_add(1, std::memory_order_relaxed); +} + +static int64_t NotifyExecDestroyed() { + DCHECK_GE(alive_execs.load(std::memory_order_relaxed), 1); + return alive_execs.fetch_sub(1, std::memory_order_relaxed) - 1; +} + +/*static*/ int64_t GpuCommandBuffer::AllocatedExecs() { + return allocated_execs.load(std::memory_order_relaxed); +} + +/*static*/ int64_t GpuCommandBuffer::AliveExecs() { + return alive_execs.load(std::memory_order_relaxed); +} + +//===----------------------------------------------------------------------===// +// GpuCommandBuffer implementation +//===----------------------------------------------------------------------===// + +GpuCommandBuffer::GpuCommandBuffer(GpuGraphHandle graph) + : graph_(graph), exec_(nullptr) {} + +GpuCommandBuffer::~GpuCommandBuffer() { + if (exec_ != nullptr) { + VLOG(5) << "Destroy GPU command buffer graph exec. " + << "Remaining alive instances: " << NotifyExecDestroyed(); + auto st = GpuDriver::DestroyGraphExec(exec_); + CHECK(st.ok()) << "Failed to destroy GPU graph exec: " << st.message(); + } + if (graph_ != nullptr) { + auto st = GpuDriver::DestroyGraph(graph_); + CHECK(st.ok()) << "Failed to destroy GPU graph: " << st.message(); + } +} + +} // namespace stream_executor::gpu diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h index 4669dd1abc823b..7ce16607b7a60d 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h @@ -16,13 +16,43 @@ limitations under the License. #ifndef XLA_STREAM_EXECUTOR_GPU_GPU_COMMAND_BUFFER_H_ #define XLA_STREAM_EXECUTOR_GPU_GPU_COMMAND_BUFFER_H_ +#include +#include + +#include "xla/stream_executor/gpu/gpu_types.h" #include "xla/stream_executor/stream_executor_internal.h" namespace stream_executor::gpu { // GpuCommandBuffer provides platform-specific CommandBufferInterface // implementation (it's backed by CUDA or HIP graphs on NVIDIA and AMD devices). -class GpuCommandBuffer : public internal::CommandBufferInterface {}; +class GpuCommandBuffer : public internal::CommandBufferInterface { + public: + explicit GpuCommandBuffer(GpuGraphHandle graph); + ~GpuCommandBuffer() override; + + // We track the total number of allocated and alive executable graphs in the + // process to track the command buffers resource usage. Executable graph + // allocates resources on a GPU devices (rule of thumb is ~8kb per node), so + // we have to be careful not to keep too many of them alive for too long, or + // we have a higher risk of OOM errors. + // + // TODO(ezhulenev): We need to have a policy for how to evict unused + // executable graph instances from a device, currently lifetime of an + // executable graph is tied to a parent command buffer, and we can have + // thousands of command buffers alive at the same time. + static int64_t AllocatedExecs(); + static int64_t AliveExecs(); + + private: + static_assert(std::is_pointer_v, + "GpuGraphHandle must be a pointer"); + static_assert(std::is_pointer_v, + "GpuGraphExecHandle must be a pointer"); + + GpuGraphHandle graph_ = nullptr; // owned handle + GpuGraphExecHandle exec_ = nullptr; // owned handle +}; } // namespace stream_executor::gpu diff --git a/third_party/xla/xla/stream_executor/platform/initialize.h b/third_party/xla/xla/stream_executor/platform/initialize.h index f0d43b82b23a97..d56bda8383ae2b 100644 --- a/third_party/xla/xla/stream_executor/platform/initialize.h +++ b/third_party/xla/xla/stream_executor/platform/initialize.h @@ -19,9 +19,9 @@ limitations under the License. #include "xla/stream_executor/platform/platform.h" #if defined(PLATFORM_GOOGLE) || defined(PLATFORM_CHROMIUMOS) -#include "xla/stream_executor/platform/google/initialize.h" +#include "xla/stream_executor/platform/google/initialize.h" // IWYU pragma: export #else -#include "xla/stream_executor/platform/default/initialize.h" +#include "xla/stream_executor/platform/default/initialize.h" // IWYU pragma: export #endif #endif // XLA_STREAM_EXECUTOR_PLATFORM_INITIALIZE_H_ From bbda501caf7e83e7d390c372647ec23864a9c65c Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Mon, 25 Sep 2023 14:26:26 -0700 Subject: [PATCH 238/567] Add new context type for serving PiperOrigin-RevId: 568324654 --- .../xla/third_party/tsl/tsl/profiler/lib/context_types.cc | 2 ++ .../xla/third_party/tsl/tsl/profiler/lib/context_types.h | 1 + 2 files changed, 3 insertions(+) diff --git a/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.cc b/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.cc index f5394ef32016bb..9379885c4de76b 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.cc +++ b/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.cc @@ -44,6 +44,8 @@ const char* GetContextTypeString(ContextType context_type) { return "tpu_stream"; case ContextType::kTpuLaunch: return "tpu_launch"; + case ContextType::kPathwaysExecutor: + return "pathways_exec"; } } diff --git a/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.h b/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.h index dc4a0ebe1f5a65..6f65454354a1dc 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.h +++ b/third_party/xla/third_party/tsl/tsl/profiler/lib/context_types.h @@ -35,6 +35,7 @@ enum class ContextType : int { kBatcher, kTpuStream, kTpuLaunch, + kPathwaysExecutor, kLastContextType = ContextType::kTpuLaunch, }; From f21782ea1d8b5bb25ce2a5146950677aff650160 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 25 Sep 2023 15:46:42 -0700 Subject: [PATCH 239/567] [tsl] Add the option to obtain NCCL via a stub, rather than linking it statically or dynamically. NCCL via a stub is enabled if the environment variable TF_NCCL_USE_STUB=1 is set. The intent is to use this option in JAX to reduce the size of the CUDA wheels. NCCL takes up about 80MB in each compiled JAX wheel and takes a significant amount of time to build. It is possible other users of TSL may wish to do this also. PiperOrigin-RevId: 568344701 --- third_party/gpus/cuda/cuda_config.h.tpl | 1 + third_party/nccl/archive.BUILD | 23 +++++ third_party/nccl/nccl_configure.bzl | 28 +++++- .../third_party/gpus/cuda/cuda_config.h.tpl | 1 + .../xla/third_party/nccl/archive.BUILD | 23 +++++ .../xla/third_party/nccl/nccl_configure.bzl | 28 +++++- .../third_party/gpus/cuda/cuda_config.h.tpl | 1 + .../tsl/third_party/nccl/archive.BUILD | 23 +++++ .../tsl/third_party/nccl/nccl_configure.bzl | 28 +++++- .../xla/third_party/tsl/tsl/cuda/BUILD.bazel | 26 ++++++ .../xla/third_party/tsl/tsl/cuda/nccl.symbols | 54 +++++++++++ .../xla/third_party/tsl/tsl/cuda/nccl_stub.cc | 93 +++++++++++++++++++ .../tsl/tsl/platform/default/dso_loader.cc | 5 + .../tsl/tsl/platform/default/dso_loader.h | 1 + third_party/xla/xla/python/BUILD | 1 + 15 files changed, 333 insertions(+), 3 deletions(-) create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/nccl.symbols create mode 100644 third_party/xla/third_party/tsl/tsl/cuda/nccl_stub.cc diff --git a/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/gpus/cuda/cuda_config.h.tpl index 03ecd0159f496a..cd2850db146685 100644 --- a/third_party/gpus/cuda/cuda_config.h.tpl +++ b/third_party/gpus/cuda/cuda_config.h.tpl @@ -25,6 +25,7 @@ limitations under the License. #define TF_CUFFT_VERSION "%{cufft_version}" #define TF_CUSPARSE_VERSION "%{cusparse_version}" #define TF_CUDNN_VERSION "%{cudnn_version}" +#define TF_NCCL_VERSION "%{nccl_version}" #define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}" diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD index a608faf8a262ef..0a639a560976ae 100644 --- a/third_party/nccl/archive.BUILD +++ b/third_party/nccl/archive.BUILD @@ -86,6 +86,29 @@ cc_library( ], ) +cc_library( + name = "nccl_via_stub", + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + "@local_tsl//tsl/cuda:nccl_stub", + ], +) + +cc_library( + name = "nccl_headers", + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], +) + cc_library( name = "nccl", srcs = glob( diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index 10cc2729ee92e8..acdeac3b9fd647 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -9,6 +9,8 @@ * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is `/usr/local/cuda,usr/`. * `TF_CUDA_CLANG`: "1" if using Clang, "0" if using NVCC. + * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should + be used, "0" if NCCL should be linked in statically. """ @@ -32,6 +34,7 @@ _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NEED_CUDA = "TF_NEED_CUDA" _TF_CUDA_PATHS = "TF_CUDA_PATHS" _TF_CUDA_CLANG = "TF_CUDA_CLANG" +_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" _DEFINE_NCCL_MINOR = "#define NCCL_MINOR" @@ -63,6 +66,26 @@ alias( ) """ +_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl_via_stub", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_headers", + actual = "@nccl_archive//:nccl_headers", + visibility = ["//visibility:public"], +) +""" + def _label(file): return Label("//third_party/nccl:{}".format(file)) @@ -82,7 +105,10 @@ def _create_local_nccl_repository(repository_ctx): if nccl_version == "": # Alias to open source build from @nccl_archive. - repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0": + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + else: + repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT) repository_ctx.template( "build_defs.bzl", diff --git a/third_party/xla/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/xla/third_party/gpus/cuda/cuda_config.h.tpl index 03ecd0159f496a..cd2850db146685 100644 --- a/third_party/xla/third_party/gpus/cuda/cuda_config.h.tpl +++ b/third_party/xla/third_party/gpus/cuda/cuda_config.h.tpl @@ -25,6 +25,7 @@ limitations under the License. #define TF_CUFFT_VERSION "%{cufft_version}" #define TF_CUSPARSE_VERSION "%{cusparse_version}" #define TF_CUDNN_VERSION "%{cudnn_version}" +#define TF_NCCL_VERSION "%{nccl_version}" #define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}" diff --git a/third_party/xla/third_party/nccl/archive.BUILD b/third_party/xla/third_party/nccl/archive.BUILD index a608faf8a262ef..0a639a560976ae 100644 --- a/third_party/xla/third_party/nccl/archive.BUILD +++ b/third_party/xla/third_party/nccl/archive.BUILD @@ -86,6 +86,29 @@ cc_library( ], ) +cc_library( + name = "nccl_via_stub", + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + "@local_tsl//tsl/cuda:nccl_stub", + ], +) + +cc_library( + name = "nccl_headers", + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], +) + cc_library( name = "nccl", srcs = glob( diff --git a/third_party/xla/third_party/nccl/nccl_configure.bzl b/third_party/xla/third_party/nccl/nccl_configure.bzl index 9653ca49deff4a..7a9a9ac6112461 100644 --- a/third_party/xla/third_party/nccl/nccl_configure.bzl +++ b/third_party/xla/third_party/nccl/nccl_configure.bzl @@ -9,6 +9,8 @@ * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is `/usr/local/cuda,usr/`. * `TF_CUDA_CLANG`: "1" if using Clang, "0" if using NVCC. + * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should + be used, "0" if NCCL should be linked in statically. """ @@ -32,6 +34,7 @@ _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NEED_CUDA = "TF_NEED_CUDA" _TF_CUDA_PATHS = "TF_CUDA_PATHS" _TF_CUDA_CLANG = "TF_CUDA_CLANG" +_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" _DEFINE_NCCL_MINOR = "#define NCCL_MINOR" @@ -63,6 +66,26 @@ alias( ) """ +_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl_via_stub", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_headers", + actual = "@nccl_archive//:nccl_headers", + visibility = ["//visibility:public"], +) +""" + def _label(file): return Label("//third_party/nccl:{}".format(file)) @@ -82,7 +105,10 @@ def _create_local_nccl_repository(repository_ctx): if nccl_version == "": # Alias to open source build from @nccl_archive. - repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0": + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + else: + repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT) repository_ctx.template( "build_defs.bzl", diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_config.h.tpl index 03ecd0159f496a..cd2850db146685 100644 --- a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_config.h.tpl +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_config.h.tpl @@ -25,6 +25,7 @@ limitations under the License. #define TF_CUFFT_VERSION "%{cufft_version}" #define TF_CUSPARSE_VERSION "%{cusparse_version}" #define TF_CUDNN_VERSION "%{cudnn_version}" +#define TF_NCCL_VERSION "%{nccl_version}" #define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}" diff --git a/third_party/xla/third_party/tsl/third_party/nccl/archive.BUILD b/third_party/xla/third_party/tsl/third_party/nccl/archive.BUILD index a608faf8a262ef..0a639a560976ae 100644 --- a/third_party/xla/third_party/tsl/third_party/nccl/archive.BUILD +++ b/third_party/xla/third_party/tsl/third_party/nccl/archive.BUILD @@ -86,6 +86,29 @@ cc_library( ], ) +cc_library( + name = "nccl_via_stub", + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + "@local_tsl//tsl/cuda:nccl_stub", + ], +) + +cc_library( + name = "nccl_headers", + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], +) + cc_library( name = "nccl", srcs = glob( diff --git a/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl b/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl index ca5c66ea0322fe..5ba212f002e85e 100644 --- a/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl +++ b/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl @@ -9,6 +9,8 @@ * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is `/usr/local/cuda,usr/`. * `TF_CUDA_CLANG`: "1" if using Clang, "0" if using NVCC. + * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should + be used, "0" if NCCL should be linked in statically. """ @@ -32,6 +34,7 @@ _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NEED_CUDA = "TF_NEED_CUDA" _TF_CUDA_PATHS = "TF_CUDA_PATHS" _TF_CUDA_CLANG = "TF_CUDA_CLANG" +_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" _DEFINE_NCCL_MINOR = "#define NCCL_MINOR" @@ -63,6 +66,26 @@ alias( ) """ +_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl_via_stub", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_headers", + actual = "@nccl_archive//:nccl_headers", + visibility = ["//visibility:public"], +) +""" + def _label(file): return Label("//third_party/nccl:{}".format(file)) @@ -82,7 +105,10 @@ def _create_local_nccl_repository(repository_ctx): if nccl_version == "": # Alias to open source build from @nccl_archive. - repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0": + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + else: + repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT) repository_ctx.template( "build_defs.bzl", diff --git a/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel index 10195390362436..47291d50c4e2cd 100644 --- a/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel +++ b/third_party/xla/third_party/tsl/tsl/cuda/BUILD.bazel @@ -285,3 +285,29 @@ cc_library( "//tsl/platform:env", ]), ) + +cuda_stub( + name = "nccl", + srcs = ["nccl.symbols"], +) + +cc_library( + name = "nccl_stub", + srcs = if_cuda_is_configured([ + "nccl_stub.cc", + "nccl.tramp.S", + ]), + linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/nccl/lib")), + local_defines = [ + "IMPLIB_EXPORT_SHIMS=1", + ], + textual_hdrs = ["nccl.inc"], + visibility = ["//visibility:public"], + deps = if_cuda_is_configured([ + "@com_google_absl//absl/container:flat_hash_set", + "@local_config_cuda//cuda:cuda_headers", + "@local_config_nccl//:nccl_headers", + "//tsl/platform:dso_loader", + "//tsl/platform:env", + ]), +) diff --git a/third_party/xla/third_party/tsl/tsl/cuda/nccl.symbols b/third_party/xla/third_party/tsl/tsl/cuda/nccl.symbols new file mode 100644 index 00000000000000..0d6552dafe0238 --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/nccl.symbols @@ -0,0 +1,54 @@ +ncclAllGather +ncclAllReduce +ncclBcast +ncclBroadcast +ncclCommAbort +ncclCommCount +ncclCommCuDevice +ncclCommDestroy +ncclCommFinalize +ncclCommGetAsyncError +ncclCommInitAll +ncclCommInitRank +ncclCommInitRankConfig +ncclCommSplit +ncclCommUserRank +ncclGetErrorString +ncclGetLastError +ncclGetUniqueId +ncclGetVersion +ncclGroupEnd +ncclGroupStart +ncclRecv +ncclRedOpCreatePreMulSum +ncclRedOpDestroy +ncclReduce +ncclReduceScatter +ncclSend +pncclAllGather +pncclAllReduce +pncclBcast +pncclBroadcast +pncclCommAbort +pncclCommCount +pncclCommCuDevice +pncclCommDestroy +pncclCommFinalize +pncclCommGetAsyncError +pncclCommInitAll +pncclCommInitRank +pncclCommInitRankConfig +pncclCommSplit +pncclCommUserRank +pncclGetErrorString +pncclGetLastError +pncclGetUniqueId +pncclGetVersion +pncclGroupEnd +pncclGroupStart +pncclRecv +pncclRedOpCreatePreMulSum +pncclRedOpDestroy +pncclReduce +pncclReduceScatter +pncclSend diff --git a/third_party/xla/third_party/tsl/tsl/cuda/nccl_stub.cc b/third_party/xla/third_party/tsl/tsl/cuda/nccl_stub.cc new file mode 100644 index 00000000000000..0ebae2f3c2b2eb --- /dev/null +++ b/third_party/xla/third_party/tsl/tsl/cuda/nccl_stub.cc @@ -0,0 +1,93 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "absl/container/flat_hash_set.h" +#include "third_party/gpus/cuda/include/cuda.h" +#include "third_party/nccl/nccl.h" +#include "tsl/platform/dso_loader.h" +#include "tsl/platform/env.h" + +// Implements the nccl API by forwarding to nccl loaded from a DSO. + +namespace { +// Returns DSO handle or null if loading the DSO fails. +void* GetDsoHandle() { +#ifdef PLATFORM_GOOGLE + return nullptr; +#else + static auto handle = []() -> void* { + auto handle_or = tsl::internal::DsoLoader::GetNcclDsoHandle(); + if (!handle_or.ok()) return nullptr; + return handle_or.value(); + }(); + return handle; +#endif +} + +void* LoadSymbol(const char* symbol_name) { + void* symbol = nullptr; + if (auto handle = GetDsoHandle()) { + tsl::Env::Default() + ->GetSymbolFromLibrary(handle, symbol_name, &symbol) + .IgnoreError(); + } + return symbol; +} + +const char* kSymbols[] = { +#include "tsl/cuda/nccl.inc" +}; + +constexpr size_t kNumSymbols = sizeof(kSymbols) / sizeof(const char*); + +absl::flat_hash_set const& ErrorStringSymbols() { + static auto* syms = new absl::flat_hash_set{ + "ncclGetErrorString", + "pncclGetErrorString", + "ncclGetLastError", + "pncclGetLastError", + }; + return *syms; +} + +} // namespace + +extern "C" { + +static ncclResult_t GetSymbolNotFoundError() { return ncclSystemError; } + +static const char* ReturnErrorString() { + return "Unable to load NCCL library. Multi-GPU collectives will not work."; +} + +extern void* _nccl_tramp_table[]; + +void _nccl_tramp_resolve(int i) { + CHECK_LE(0, i); + CHECK_LT(i, kNumSymbols); + void* p = LoadSymbol(kSymbols[i]); + if (!p) { + const auto& error_string_syms = ErrorStringSymbols(); + if (error_string_syms.find(kSymbols[i]) != error_string_syms.end()) { + p = reinterpret_cast(&ReturnErrorString); + } else { + p = reinterpret_cast(&GetSymbolNotFoundError); + } + } + _nccl_tramp_table[i] = p; +} + +} // extern "C" diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.cc b/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.cc index d3552daa298b5f..c75806e902cac3 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.cc +++ b/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.cc @@ -44,6 +44,7 @@ string GetCublasVersion() { return TF_CUBLAS_VERSION; } string GetCusolverVersion() { return TF_CUSOLVER_VERSION; } string GetCufftVersion() { return TF_CUFFT_VERSION; } string GetCusparseVersion() { return TF_CUSPARSE_VERSION; } +string GetNcclVersion() { return TF_NCCL_VERSION; } string GetTensorRTVersion() { return TF_TENSORRT_VERSION; } StatusOr GetDsoHandle(const string& name, const string& version) { @@ -119,6 +120,10 @@ StatusOr GetCudnnDsoHandle() { return GetDsoHandle("cudnn", GetCudnnVersion()); } +StatusOr GetNcclDsoHandle() { + return GetDsoHandle("nccl", GetNcclVersion()); +} + StatusOr GetNvInferDsoHandle() { #if defined(PLATFORM_WINDOWS) return GetDsoHandle("nvinfer", ""); diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.h b/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.h index db20d493336953..ee5b2b28af3486 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.h +++ b/third_party/xla/third_party/tsl/tsl/platform/default/dso_loader.h @@ -37,6 +37,7 @@ StatusOr GetCusolverDsoHandle(); StatusOr GetCusparseDsoHandle(); StatusOr GetCuptiDsoHandle(); StatusOr GetCudnnDsoHandle(); +StatusOr GetNcclDsoHandle(); StatusOr GetNvInferDsoHandle(); StatusOr GetNvInferPluginDsoHandle(); diff --git a/third_party/xla/xla/python/BUILD b/third_party/xla/xla/python/BUILD index 44852eec3fb6f1..0b4e59bd199c40 100644 --- a/third_party/xla/xla/python/BUILD +++ b/third_party/xla/xla/python/BUILD @@ -1013,6 +1013,7 @@ tsl_pybind_extension( "-Wl,-rpath,$$ORIGIN/../nvidia/cufft/lib", "-Wl,-rpath,$$ORIGIN/../nvidia/cudnn/lib", "-Wl,-rpath,$$ORIGIN/../nvidia/cusolver/lib", + "-Wl,-rpath,$$ORIGIN/../nvidia/nccl/lib", ], "//conditions:default": [], }), From 450dec35448a73b3fcb5d4f82108d5fdcb3f59b4 Mon Sep 17 00:00:00 2001 From: Ziyin Huang Date: Mon, 25 Sep 2023 16:06:27 -0700 Subject: [PATCH 240/567] Internal change, add some checks on the sparseTensor format checking. PiperOrigin-RevId: 568349775 --- .../core/tpu/kernels/sparse_core_preprocess_ops.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc index 38df331827907a..da656554186be5 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc @@ -122,8 +122,15 @@ Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, // The row ids are just the sample ids which is the first dim of the // indices. auto indices_matrix = indices_or_row_splits.matrix(); + int32 previous_row_id = -1; for (int32 i = 0; i < total_id_count; ++i) { - *(row_ids_before_padding + i) = indices_matrix(i, 0); + int32 current_row_id = indices_matrix(i, 0); + if (current_row_id < previous_row_id) { + return absl::InvalidArgumentError( + "Invalid indices_or_row_splits input, indices of SparseTensor need " + "to be sorted in ascending order."); + } + *(row_ids_before_padding + i) = current_row_id; } } else if (indices_or_row_splits.dims() == 1 && indices_or_row_splits.NumElements() > 0) { From 0bd205eeb7db15238aa795243cc07e401e97e71e Mon Sep 17 00:00:00 2001 From: Jared Junyoung Lim Date: Mon, 25 Sep 2023 16:14:51 -0700 Subject: [PATCH 241/567] Split long-running test cases in mul_test into subshards. This improves parallelism by evening out the sizes of the different shards and thus speeds up this sharded test. PiperOrigin-RevId: 568351990 --- tensorflow/lite/kernels/mul_test.cc | 230 +++++++++++++++++++++------- 1 file changed, 176 insertions(+), 54 deletions(-) diff --git a/tensorflow/lite/kernels/mul_test.cc b/tensorflow/lite/kernels/mul_test.cc index eff799518435db..34b484a4ca9c2c 100644 --- a/tensorflow/lite/kernels/mul_test.cc +++ b/tensorflow/lite/kernels/mul_test.cc @@ -808,8 +808,8 @@ constexpr int kDim6 = 7; constexpr int kMaxMulBroadcastDim = 6; -void TestBroadcast(std::vector input1_shape, - std::vector input2_shape) { +void TestFloatBroadcast(std::vector input1_shape, + std::vector input2_shape) { std::array input1_dims; std::array input2_dims; std::array output_dims; @@ -974,11 +974,23 @@ void TestIntegerBroadcast(std::vector input1_shape, EXPECT_THAT(m.GetOutput(), testing::ContainerEq(output_ref)); } -TEST(FloatMulOpModel, Float32MultiDimBroadcast) { +// To improve automatic test sharding (via shard_count in the BUILD file), +// we need to ensure that each individual test case runs in a reasonable time, +// otherwise we end up being limited by the performance of the longest shard. +// Since TestFloat32MultiDimBroadcast has 2^12 iterations, it takes a +// long time (over 30 seconds) to execute all iterations -- too long for a +// single shard. So we split it into a few "subshards" and have a separate +// TYPED_TEST macro invocation for each subshard. + +void TestFloat32MultiDimBroadcast(int selected_subshard, int subshard_count) { + int iteration = 0; for (uint32_t bm1 = 0; bm1 < (static_cast(1) << kMaxMulBroadcastDim); bm1++) { for (uint32_t bm2 = 0; bm2 < (static_cast(1) << kMaxMulBroadcastDim); bm2++) { + if (iteration++ % subshard_count != selected_subshard) { + continue; // This iteration of the loop is not part of this subshard. + } const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); @@ -1016,24 +1028,76 @@ TEST(FloatMulOpModel, Float32MultiDimBroadcast) { input1_full_shape.end(), input1_shape.data()); std::copy(input2_full_shape.end() - input2_dims, input2_full_shape.end(), input2_shape.data()); - TestBroadcast(input1_shape, input2_shape); + TestFloatBroadcast(input1_shape, input2_shape); } } } } } +// Should match the number of TEST or TYPED_TEST invoations for each of +// Float32MultiDimBroadcastSubshard*, +// IntegerMultiDimBroadcastSubshard*, +// Int8QuantizedMultiDimBroadcastSubshard*, and +// Uint8QuantizedMultiDimBroadcastSubshard* below. +constexpr int kMultiDimBroadcastSubshardCount = 10; + +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard0) { + TestFloat32MultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard1) { + TestFloat32MultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard2) { + TestFloat32MultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard3) { + TestFloat32MultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard4) { + TestFloat32MultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard5) { + TestFloat32MultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard6) { + TestFloat32MultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard7) { + TestFloat32MultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard8) { + TestFloat32MultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TEST(FloatMulOpModel, Float32MultiDimBroadcastSubshard9) { + TestFloat32MultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + template class IntegerMulOpTest : public ::testing::Test {}; using Int16OrInt32Or64Types = ::testing::Types; TYPED_TEST_SUITE(IntegerMulOpTest, Int16OrInt32Or64Types); -TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcast) { +// To improve automatic test sharding (via shard_count in the BUILD file), +// we need to ensure that each individual test case runs in a reasonable time, +// otherwise we end up being limited by the performance of the longest shard. +// Since TestIntegerMultiDimBroadcast has 2^12 iterations, it takes a +// long time (over 30 seconds) to execute all iterations -- too long for a +// single shard. So we split it into a few "subshards" and have a separate +// TYPED_TEST macro invocation for each subshard. + +template +void TestIntegerMultiDimBroadcast(int selected_subshard, int subshard_count) { + ASSERT_LT(selected_subshard, subshard_count); + int iteration = 0; for (uint32_t bm1 = 0; bm1 < (static_cast(1) << kMaxMulBroadcastDim); bm1++) { for (uint32_t bm2 = 0; bm2 < (static_cast(1) << kMaxMulBroadcastDim); bm2++) { + if (iteration++ % subshard_count != selected_subshard) { + continue; // This iteration of the loop is not part of this subshard. + } const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); @@ -1078,6 +1142,37 @@ TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcast) { } } +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard0) { + TestIntegerMultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard1) { + TestIntegerMultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard2) { + TestIntegerMultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard3) { + TestIntegerMultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard4) { + TestIntegerMultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard5) { + TestIntegerMultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard6) { + TestIntegerMultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard7) { + TestIntegerMultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard8) { + TestIntegerMultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TYPED_TEST(IntegerMulOpTest, IntegerMultiDimBroadcastSubshard9) { + TestIntegerMultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + template void TestQuantizedBroadcast(std::vector input1_shape, std::vector input2_shape) { @@ -1185,11 +1280,25 @@ void TestQuantizedBroadcast(std::vector input1_shape, } } -TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcast) { +// To improve automatic test sharding (via shard_count in the BUILD file), +// we need to ensure that each individual test case runs in a reasonable time, +// otherwise we end up being limited by the performance of the longest shard. +// Since TestQuantizedMultiDimBroadcast has 2^12 iterations, it takes a +// long time (over 30 seconds) to execute all iterations -- too long for a +// single shard. So we split it into a few "subshards" and have a separate +// TEST macro invocation for each subshard. + +template +void TestQuantizedMultiDimBroadcast(int selected_subshard, int subshard_count) { + ASSERT_LT(selected_subshard, subshard_count); + int iteration = 0; for (uint32_t bm1 = 0; bm1 < (static_cast(1) << kMaxMulBroadcastDim); bm1++) { for (uint32_t bm2 = 0; bm2 < (static_cast(1) << kMaxMulBroadcastDim); bm2++) { + if (iteration++ % subshard_count != selected_subshard) { + continue; // This iteration of the loop is not part of this subshard. + } const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); @@ -1227,60 +1336,73 @@ TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcast) { input1_full_shape.end(), input1_shape.data()); std::copy(input2_full_shape.end() - input2_dims, input2_full_shape.end(), input2_shape.data()); - TestQuantizedBroadcast(input1_shape, input2_shape); + TestQuantizedBroadcast(input1_shape, input2_shape); } } } } } -TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcast) { - for (uint32_t bm1 = 0; - bm1 < (static_cast(1) << kMaxMulBroadcastDim); bm1++) { - for (uint32_t bm2 = 0; - bm2 < (static_cast(1) << kMaxMulBroadcastDim); bm2++) { - const bool input1_broadcast_dim1 = bm1 & (static_cast(1) << 0); - const bool input1_broadcast_dim2 = bm1 & (static_cast(1) << 1); - const bool input1_broadcast_dim3 = bm1 & (static_cast(1) << 2); - const bool input1_broadcast_dim4 = bm1 & (static_cast(1) << 3); - const bool input1_broadcast_dim5 = bm1 & (static_cast(1) << 4); - const bool input1_broadcast_dim6 = bm1 & (static_cast(1) << 5); - const bool input2_broadcast_dim1 = bm2 & (static_cast(1) << 0); - const bool input2_broadcast_dim2 = bm2 & (static_cast(1) << 1); - const bool input2_broadcast_dim3 = bm2 & (static_cast(1) << 2); - const bool input2_broadcast_dim4 = bm2 & (static_cast(1) << 3); - const bool input2_broadcast_dim5 = bm2 & (static_cast(1) << 4); - const bool input2_broadcast_dim6 = bm2 & (static_cast(1) << 5); - const int input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1; - const int input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2; - const int input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3; - const int input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4; - const int input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5; - const int input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6; - const int input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1; - const int input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2; - const int input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3; - const int input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4; - const int input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5; - const int input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6; - std::vector input1_full_shape{input1_dim1, input1_dim2, input1_dim3, - input1_dim4, input1_dim5, input1_dim6}; - std::vector input2_full_shape{input2_dim1, input2_dim2, input2_dim3, - input2_dim4, input2_dim5, input2_dim6}; - for (int input1_dims = 1; input1_dims <= kMaxMulBroadcastDim; - ++input1_dims) { - for (int input2_dims = 1; input2_dims <= kMaxMulBroadcastDim; - ++input2_dims) { - std::vector input1_shape(input1_dims), input2_shape(input2_dims); - std::copy(input1_full_shape.end() - input1_dims, - input1_full_shape.end(), input1_shape.data()); - std::copy(input2_full_shape.end() - input2_dims, - input2_full_shape.end(), input2_shape.data()); - TestQuantizedBroadcast(input1_shape, input2_shape); - } - } - } - } +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard0) { + TestQuantizedMultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard1) { + TestQuantizedMultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard2) { + TestQuantizedMultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard3) { + TestQuantizedMultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard4) { + TestQuantizedMultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard5) { + TestQuantizedMultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard6) { + TestQuantizedMultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard7) { + TestQuantizedMultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard8) { + TestQuantizedMultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Int8QuantizedMultiDimBroadcastSubshard9) { + TestQuantizedMultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); +} + +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard0) { + TestQuantizedMultiDimBroadcast(0, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard1) { + TestQuantizedMultiDimBroadcast(1, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard2) { + TestQuantizedMultiDimBroadcast(2, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard3) { + TestQuantizedMultiDimBroadcast(3, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard4) { + TestQuantizedMultiDimBroadcast(4, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard5) { + TestQuantizedMultiDimBroadcast(5, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard6) { + TestQuantizedMultiDimBroadcast(6, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard7) { + TestQuantizedMultiDimBroadcast(7, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard8) { + TestQuantizedMultiDimBroadcast(8, kMultiDimBroadcastSubshardCount); +} +TEST(QuantizedMulOpModel, Uint8QuantizedMultiDimBroadcastSubshard9) { + TestQuantizedMultiDimBroadcast(9, kMultiDimBroadcastSubshardCount); } } // namespace From 9668ea0809b56288400d9dadaae57678043046b3 Mon Sep 17 00:00:00 2001 From: Yishuang Pang Date: Mon, 25 Sep 2023 16:17:09 -0700 Subject: [PATCH 242/567] Legalize some MHLO broadcasted select op to TF SelectV2 op directly. tf.BroadcastTo op folder folds constants by default, this would increase the size of models converted from StableHLO because StableHLO requires explicit broadcasting. This change helps reduce model size by removing unnecessary broadcasts at legalization stage. PiperOrigin-RevId: 568352559 --- .../lite/stablehlo/tests/legalize_hlo.mlir | 26 +++++++++++++++++++ .../transforms/legalize_hlo_patterns.td | 17 ++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir b/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir index e02ce38908d9ff..2df637ef62f263 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir +++ b/tensorflow/compiler/mlir/lite/stablehlo/tests/legalize_hlo.mlir @@ -986,6 +986,32 @@ func.func @selectv2_pred_scalar(%arg0: tensor, %arg1: tensor<2xi32>, %arg2: func.return %0 : tensor<2xi32> } +// CHECK-LABEL: func @selectv2_broadcasted_operand( +// CHECK-SAME: %[[VAL_0:.*]]: tensor, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x1xi32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<1x100xi32>) -> tensor<1x100xi32> { +// CHECK: %[[VAL_3:.*]] = "tf.SelectV2"(%[[VAL_0]], %[[VAL_1]], %[[VAL_2]]) : (tensor, tensor<1x1xi32>, tensor<1x100xi32>) -> tensor<1x100xi32> +// CHECK: return %[[VAL_3]] : tensor<1x100xi32> +// CHECK: } +func.func @selectv2_broadcasted_operand(%arg0: tensor, %arg1: tensor<1x1xi32>, %arg2: tensor<1x100xi32>) -> tensor<1x100xi32> { + %0 = "mhlo.broadcast_in_dim"(%arg1) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xi32>) -> tensor<1x100xi32> + %1 = "mhlo.select"(%arg0, %0, %arg2) : (tensor, tensor<1x100xi32>, tensor<1x100xi32>) -> tensor<1x100xi32> + func.return %1 : tensor<1x100xi32> +} + +// CHECK-LABEL: func @selectv2_broadcasted_condition( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x1xi1>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<1x100xi32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<1x100xi32>) -> tensor<1x100xi32> { +// CHECK: %[[VAL_3:.*]] = "tf.SelectV2"(%[[VAL_0]], %[[VAL_1]], %[[VAL_2]]) : (tensor<1x1xi1>, tensor<1x100xi32>, tensor<1x100xi32>) -> tensor<1x100xi32> +// CHECK: return %[[VAL_3]] : tensor<1x100xi32> +// CHECK: } +func.func @selectv2_broadcasted_condition(%arg0: tensor<1x1xi1>, %arg1: tensor<1x100xi32>, %arg2: tensor<1x100xi32>) -> tensor<1x100xi32> { + %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xi1>) -> tensor<1x100xi1> + %1 = "mhlo.select"(%0, %arg1, %arg2) : (tensor<1x100xi1>, tensor<1x100xi32>, tensor<1x100xi32>) -> tensor<1x100xi32> + func.return %1 : tensor<1x100xi32> +} + // CHECK-LABEL: func @transpose_2d( // CHECK-SAME: %[[VAL_0:.*]]: tensor<2x3xf32>) -> tensor<3x2xf32> { // CHECK-DAG: %[[VAL_1:.*]] = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi64>} : () -> tensor<2xi64> diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td index 9cf9aa518849f5..307f3515c68aff 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_patterns.td @@ -225,6 +225,23 @@ def : Pat<(MHLO_RoundNearestEvenOp $input), (TF_RoundOp $input)>; def : Pat<(MHLO_ClampOp $min, $arg, $max), (TF_MaximumOp (TF_MinimumOp $arg, $max), $min)>; def : Pat<(MHLO_SelectOp $cond, $t, $e), (TF_SelectOp $cond, $t, $e)>; +def : Pat<(MHLO_SelectOp (MHLO_BroadcastInDimOp:$output + $bcast_cond, + $broadcast_dimensions), $t, $e), + (TF_SelectV2Op $bcast_cond, $t, $e), + [(IsTFStyleBroadcast $broadcast_dimensions, $output)]>; +def : Pat<(MHLO_SelectOp $cond, $t, + (MHLO_BroadcastInDimOp:$output + $bcast_operand, + $broadcast_dimensions)), + (TF_SelectV2Op $cond, $t, $bcast_operand), + [(IsTFStyleBroadcast $broadcast_dimensions, $output)]>; +def : Pat<(MHLO_SelectOp $cond, (MHLO_BroadcastInDimOp:$output + $bcast_operand, + $broadcast_dimensions), + $e), + (TF_SelectV2Op $cond, $bcast_operand, $e), + [(IsTFStyleBroadcast $broadcast_dimensions, $output)]>; //===----------------------------------------------------------------------===// // Variadic op patterns. From 55b7276ba525f322d7bd7773f61f0938a13292d1 Mon Sep 17 00:00:00 2001 From: Matt Callanan Date: Mon, 25 Sep 2023 16:21:43 -0700 Subject: [PATCH 243/567] #tf-data-service Scale up `"no_compression"` experiment to 50%. PiperOrigin-RevId: 568353683 --- tensorflow/core/data/dataset_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/data/dataset_utils.cc b/tensorflow/core/data/dataset_utils.cc index 7f0eb78fea8b4c..b03527344e3d18 100644 --- a/tensorflow/core/data/dataset_utils.cc +++ b/tensorflow/core/data/dataset_utils.cc @@ -988,7 +988,7 @@ REGISTER_DATASET_EXPERIMENT("file_locality", RandomJobSamplePercentage<0>, IndependentHostTasks); REGISTER_DATASET_EXPERIMENT("file_locality_v2", RandomJobSamplePercentage<0>, AllTasks); -REGISTER_DATASET_EXPERIMENT("no_compression", RandomJobSamplePercentage<1>, +REGISTER_DATASET_EXPERIMENT("no_compression", RandomJobSamplePercentage<50>, AllTasks); REGISTER_DATASET_EXPERIMENT("inject_io_prefetch", RandomJobSamplePercentage<50>, AllTasks); From 882e787f3d989df283811fd8309177601eccd6e9 Mon Sep 17 00:00:00 2001 From: Jieying Luo Date: Mon, 25 Sep 2023 17:24:28 -0700 Subject: [PATCH 244/567] [PJRT C API] Split callback and py_client_gpu out from py_client. This is to prepare separating GPU into a plugin. The plugin will use py_client_gpu to register custom callback for `xla_python_gpu_callback`. PiperOrigin-RevId: 568368076 --- third_party/xla/xla/python/BUILD | 76 ++++++++++++++++++--- third_party/xla/xla/python/callback.cc | 29 ++++---- third_party/xla/xla/python/callback.h | 5 +- third_party/xla/xla/python/py_client.cc | 1 + third_party/xla/xla/python/py_client_gpu.cc | 1 + 5 files changed, 88 insertions(+), 24 deletions(-) diff --git a/third_party/xla/xla/python/BUILD b/third_party/xla/xla/python/BUILD index 0b4e59bd199c40..7d9b296bd27a9e 100644 --- a/third_party/xla/xla/python/BUILD +++ b/third_party/xla/xla/python/BUILD @@ -187,7 +187,6 @@ py_strict_test( cc_library( name = "exceptions", hdrs = ["exceptions.h"], - compatible_with = [], copts = [ "-fexceptions", "-fno-strict-aliasing", @@ -314,7 +313,6 @@ cc_library( cc_library( name = "py_client", srcs = [ - "callback.cc", "py_array.cc", "py_buffer.cc", "py_client.cc", @@ -324,11 +322,8 @@ cc_library( "py_host_callback.cc", "py_values.cc", "sharding.cc", - ] + if_cuda_or_rocm([ - "py_client_gpu.cc", - ]), + ], hdrs = [ - "callback.h", "py_array.h", "py_buffer.h", "py_client.h", @@ -339,9 +334,7 @@ cc_library( "py_values.h", "sharded_device_array.h", "sharding.h", - ] + if_cuda_or_rocm([ - "py_client_gpu.h", - ]), + ], compatible_with = [], copts = [ "-fexceptions", @@ -353,8 +346,10 @@ cc_library( features = ["-use_header_modules"], visibility = ["//visibility:public"], deps = [ + ":callback", ":exceptions", ":pprof_profile_builder", + ":py_client_gpu", ":py_host_callback_proto_cc", ":python_ref_manager", ":python_utils", @@ -368,7 +363,9 @@ cc_library( "@com_google_absl//absl/base", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/hash", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", "@com_google_absl//absl/types:variant", @@ -411,6 +408,67 @@ cc_library( ]), ) +cc_library( + name = "callback", + srcs = [ + "callback.cc", + ], + hdrs = [ + "callback.h", + ], + copts = [ + "-fexceptions", + "-fno-strict-aliasing", + ], + features = ["-use_header_modules"], + visibility = ["//visibility:public"], + deps = [ + ":python_ref_manager", + "//xla:comparison_util", + "//xla:xla_data_proto_cc", + "//xla/pjrt:transpose", + "//xla/service:custom_call_status", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:span", + "@local_tsl//tsl/platform:statusor", + "@pybind11", + ], +) + +cc_library( + name = "py_client_gpu", + srcs = if_cuda_or_rocm([ + "py_client_gpu.cc", + ]), + hdrs = if_cuda_or_rocm([ + "py_client_gpu.h", + ]), + copts = [ + "-fexceptions", + "-fno-strict-aliasing", + ], + defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm([ + "TENSORFLOW_USE_ROCM=1", + ]), + features = ["-use_header_modules"], + visibility = ["//visibility:public"], + deps = [ + ":callback", + ":exceptions", + "//xla:comparison_util", + "//xla/service:custom_call_status", + "@com_google_absl//absl/base", + "@com_google_absl//absl/strings", + "@local_tsl//tsl/platform:errors", + "@pybind11", + ] + if_cuda([ + "@local_config_cuda//cuda:cuda_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocm_headers", + ]), +) + cc_library( name = "dlpack", srcs = ["dlpack.cc"], diff --git a/third_party/xla/xla/python/callback.cc b/third_party/xla/xla/python/callback.cc index 99ad6962549859..fbba7e68412997 100644 --- a/third_party/xla/xla/python/callback.cc +++ b/third_party/xla/xla/python/callback.cc @@ -23,11 +23,14 @@ limitations under the License. #include #include +#include "absl/status/status.h" +#include "absl/strings/str_format.h" #include "absl/types/span.h" +#include "pybind11/numpy.h" // from @pybind11 +#include "pybind11/pytypes.h" // from @pybind11 #include "xla/primitive_util.h" -#include "xla/python/exceptions.h" #include "xla/service/custom_call_status.h" -#include "tsl/profiler/lib/traceme.h" +#include "tsl/platform/statusor.h" namespace py = pybind11; @@ -103,16 +106,18 @@ StatusOr CpuCallback::CallInternal(py::tuple args) { } catch (py::error_already_set& e) { PyErr_Clear(); std::string error_message = e.what(); - return InternalError("CpuCallback error: %s", error_message); + return absl::InternalError( + absl::StrFormat("CpuCallback error: %s", error_message)); } if (!PyTuple_Check(result_object.ptr())) { - return InternalError("CPU callback expected a tuple result, got %s", - static_cast(py::repr(result_object))); + return absl::InternalError( + absl::StrFormat("CPU callback expected a tuple result, got %s", + static_cast(py::repr(result_object)))); } if (PyTuple_Size(result_object.ptr()) != results_.size()) { - return InternalError( - "CPU callback expected a tuple with %d results, got %d", - results_.size(), PyTuple_Size(result_object.ptr())); + return absl::InternalError( + absl::StrFormat("CPU callback expected a tuple with %d results, got %d", + results_.size(), PyTuple_Size(result_object.ptr()))); } py::tuple result_tuple = py::cast(result_object); for (size_t i = 0; i < results_.size(); ++i) { @@ -120,9 +125,9 @@ StatusOr CpuCallback::CallInternal(py::tuple args) { PyTuple_GetItem(result_tuple.ptr(), i)); if (results_[i].type == xla::TOKEN) { if (!output.is_none()) { - return InternalError( + return absl::InternalError(absl::StrFormat( "Token output from Python callback should be None, got %s", - static_cast(py::repr(output))); + static_cast(py::repr(output)))); } continue; } @@ -132,11 +137,11 @@ StatusOr CpuCallback::CallInternal(py::tuple args) { absl::Span dims( reinterpret_cast(array.shape()), array.ndim()); if (dims != results_[i].expected_dims) { - return InternalError( + return absl::InternalError(absl::StrFormat( "Mismatched result shape for %d-th return value from CPU callback; " "expected array with dimensions %s, got %s", i, absl::StrJoin(results_[i].expected_dims, ","), - absl::StrJoin(dims, ",")); + absl::StrJoin(dims, ","))); } } return result_tuple; diff --git a/third_party/xla/xla/python/callback.h b/third_party/xla/xla/python/callback.h index 97e2a0f6633ee9..401554825f0718 100644 --- a/third_party/xla/xla/python/callback.h +++ b/third_party/xla/xla/python/callback.h @@ -20,12 +20,11 @@ limitations under the License. #include #include -#include "pybind11/pybind11.h" // from @pybind11 +#include "pybind11/numpy.h" // from @pybind11 #include "xla/pjrt/transpose.h" -#include "xla/python/py_values.h" #include "xla/python/python_ref_manager.h" #include "xla/service/custom_call_status.h" -#include "xla/types.h" +#include "xla/xla_data.pb.h" namespace xla { diff --git a/third_party/xla/xla/python/py_client.cc b/third_party/xla/xla/python/py_client.cc index 8d8f723d6d3cd9..ce6e4dcba1c2e7 100644 --- a/third_party/xla/xla/python/py_client.cc +++ b/third_party/xla/xla/python/py_client.cc @@ -39,6 +39,7 @@ limitations under the License. #include "xla/python/py_buffer.h" #include "xla/python/py_executable.h" #include "xla/python/py_host_callback.h" +#include "xla/python/py_values.h" #include "xla/python/python_ref_manager.h" #include "xla/python/traceback.h" #include "xla/python/transfer_guard_lib.h" diff --git a/third_party/xla/xla/python/py_client_gpu.cc b/third_party/xla/xla/python/py_client_gpu.cc index d695098f141ba1..6a5caf680243d0 100644 --- a/third_party/xla/xla/python/py_client_gpu.cc +++ b/third_party/xla/xla/python/py_client_gpu.cc @@ -25,6 +25,7 @@ limitations under the License. #include "third_party/gpus/cuda/include/cuda_runtime_api.h" #endif #include "pybind11/pybind11.h" // from @pybind11 +#include "xla/primitive_util.h" #include "xla/python/callback.h" #include "xla/python/exceptions.h" From c475f59d1a3c735a94ed8078983fc0119259bef1 Mon Sep 17 00:00:00 2001 From: Shixin Li Date: Mon, 25 Sep 2023 18:18:06 -0700 Subject: [PATCH 245/567] Create AotCompileSavedModel function to produce AotResult w/o writing to disk. PiperOrigin-RevId: 568379208 --- .../mlir/tfrt/tests/saved_model/BUILD | 1 + .../tests/saved_model/saved_model_test.cc | 35 +++++++ .../mlir/tfrt/translate/import_model.cc | 19 ++-- .../mlir/tfrt/translate/import_model.h | 22 +++-- .../core/tfrt/fallback/fallback_state.h | 4 + tensorflow/core/tfrt/saved_model/BUILD | 3 + .../python/saved_model_aot_compile_wrapper.cc | 3 +- .../saved_model/saved_model_aot_compile.cc | 91 ++++++++++++------- .../saved_model/saved_model_aot_compile.h | 24 ++++- 9 files changed, 151 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/mlir/tfrt/tests/saved_model/BUILD b/tensorflow/compiler/mlir/tfrt/tests/saved_model/BUILD index 45561d45b3f46d..ae1bfb32a6a09d 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/saved_model/BUILD +++ b/tensorflow/compiler/mlir/tfrt/tests/saved_model/BUILD @@ -22,6 +22,7 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core/platform:resource_loader", "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", "@llvm-project//mlir:IR", "@llvm-project//mlir:Parser", ], diff --git a/tensorflow/compiler/mlir/tfrt/tests/saved_model/saved_model_test.cc b/tensorflow/compiler/mlir/tfrt/tests/saved_model/saved_model_test.cc index 4cd8ce4b833ce3..f19e09d7c96d78 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/saved_model/saved_model_test.cc +++ b/tensorflow/compiler/mlir/tfrt/tests/saved_model/saved_model_test.cc @@ -19,6 +19,8 @@ limitations under the License. #include #include +#include +#include #include "absl/strings/match.h" #include "mlir/IR/Dialect.h" // from @llvm-project #include "mlir/Parser/Parser.h" // from @llvm-project @@ -190,6 +192,39 @@ TEST(SavedModelTest, ConvertTfMlirToBefExportingXlaReduceWindow) { 2); } +TEST(SavedModelTest, AddXlaFunctionsOutputFunctionNames) { + std::string saved_model_mlir_path = tensorflow::GetDataDependencyFilepath( + "tensorflow/compiler/mlir/tfrt/tests/saved_model/testdata/" + "xla_launch_xla_reduce_window.mlir"); + + mlir::DialectRegistry registry; + mlir::RegisterAllTensorFlowDialects(registry); + mlir::MLIRContext context(registry); + auto module = + mlir::parseSourceFile(saved_model_mlir_path, &context); + ASSERT_TRUE(module); + + tfrt::BefBuffer bef_buffer; + auto runtime = + tensorflow::tfrt_stub::Runtime::Create(/*num_inter_op_threads=*/1); + tfrt_stub::GraphExecutionOptions options(runtime.get()); + options.compile_options.device_target = TfrtDeviceInfraTarget::kGpu; + + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr fallback_state, + tfrt_stub::FallbackState::Create(SessionOptions(), FunctionDefLibrary())); + + tfrt::ResourceContext resource_context; + tfrt_stub::ModelRuntimeContext model_context( + &options, options.compile_options.saved_model_dir, &resource_context); + + std::vector function_names; + TF_ASSERT_OK(ConvertTfMlirToBef(options.compile_options, module.get(), + &bef_buffer, model_context, + fallback_state.get(), &function_names)); + EXPECT_THAT(function_names, ::testing::SizeIs(2)); +} + // TODO(b/162442824): Add a SavedModel test that covers the error pass. } // namespace diff --git a/tensorflow/compiler/mlir/tfrt/translate/import_model.cc b/tensorflow/compiler/mlir/tfrt/translate/import_model.cc index 1d1977ffb74dbc..05837853d2fc4a 100644 --- a/tensorflow/compiler/mlir/tfrt/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tfrt/translate/import_model.cc @@ -155,7 +155,8 @@ Status ConvertTfMlirToRuntimeExecutable( const tensorflow::TfrtPipelineOptions& options)> emit_executable, tfrt_stub::ModelRuntimeContext& model_context, - tfrt_stub::FallbackState* fallback_state) { + tfrt_stub::FallbackState* fallback_state, + std::vector* added_xla_function_names) { mlir::StatusScopedDiagnosticHandler diag_handler(module.getContext()); { @@ -224,7 +225,8 @@ Status ConvertTfMlirToRuntimeExecutable( // GPU XLA clusters are wrapped in functions, which could be transformed by // bridge. Hence, the MLIR functions for XLA clusters are exported and added // to the function library. - TF_RETURN_IF_ERROR(AddXlaFunctions(fallback_state, module)); + TF_RETURN_IF_ERROR( + AddXlaFunctions(fallback_state, module, added_xla_function_names)); } if (VLOG_IS_ON(1)) { @@ -256,7 +258,8 @@ Status ConvertTfMlirToRuntimeExecutable( Status ConvertTfMlirToBef(const TfrtCompileOptions& options, mlir::ModuleOp module, tfrt::BefBuffer* bef_buffer, tfrt_stub::ModelRuntimeContext& model_context, - tfrt_stub::FallbackState* fallback_state) { + tfrt_stub::FallbackState* fallback_state, + std::vector* added_xla_function_names) { return ConvertTfMlirToRuntimeExecutable( options, module, [bef_buffer](mlir::PassManager& pm, mlir::ModuleOp module, @@ -283,7 +286,7 @@ Status ConvertTfMlirToBef(const TfrtCompileOptions& options, bef_buffer->shrink_to_fit(); return OkStatus(); }, - model_context, fallback_state); + model_context, fallback_state, added_xla_function_names); } std::unique_ptr GetTfrtPipelineOptions( @@ -328,13 +331,17 @@ std::unique_ptr GetTfrtPipelineOptions( return pipeline_options; } -tensorflow::Status AddXlaFunctions(tfrt_stub::FallbackState* fallback_state, - mlir::ModuleOp mlir_module) { +tensorflow::Status AddXlaFunctions( + tfrt_stub::FallbackState* fallback_state, mlir::ModuleOp mlir_module, + std::vector* added_xla_function_names) { if (fallback_state != nullptr) { TF_ASSIGN_OR_RETURN(const std::vector xla_func_defs, ExportXlaFunctions(mlir_module)); for (const auto& func_def : xla_func_defs) { TF_RETURN_IF_ERROR(fallback_state->AddFunctionDef(func_def)); + if (added_xla_function_names != nullptr) { + added_xla_function_names->push_back(func_def.signature().name()); + } } } diff --git a/tensorflow/compiler/mlir/tfrt/translate/import_model.h b/tensorflow/compiler/mlir/tfrt/translate/import_model.h index 467f7ff3a42ae1..09917ea61095fe 100644 --- a/tensorflow/compiler/mlir/tfrt/translate/import_model.h +++ b/tensorflow/compiler/mlir/tfrt/translate/import_model.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_MLIR_TFRT_TRANSLATE_IMPORT_MODEL_H_ #include +#include #include #include "mlir/IR/BuiltinOps.h" // from @llvm-project @@ -50,11 +51,14 @@ Status ConvertFunctionToBef( // Converts an MLIR `module` in TF dialect to TFRT's Binary Executable Format. // If `fallback_state` is not null, the MLIR functions for XLA clusters in // the form of XlaLaunch will be exported and added to the function library when -// needed. The nested functions will also be exported. -Status ConvertTfMlirToBef(const TfrtCompileOptions& options, - mlir::ModuleOp module, tfrt::BefBuffer* bef_buffer, - tfrt_stub::ModelRuntimeContext& model_context, - tfrt_stub::FallbackState* fallback_state = nullptr); +// needed. The nested functions will also be exported. If +// `added_xla_function_names` is not null, it will be populated with the names +// of the added XLA functions. +Status ConvertTfMlirToBef( + const TfrtCompileOptions& options, mlir::ModuleOp module, + tfrt::BefBuffer* bef_buffer, tfrt_stub::ModelRuntimeContext& model_context, + tfrt_stub::FallbackState* fallback_state = nullptr, + std::vector* added_xla_function_names = nullptr); Status ConvertTfMlirToRuntimeExecutable( const TfrtCompileOptions& options, mlir::ModuleOp module, @@ -62,14 +66,16 @@ Status ConvertTfMlirToRuntimeExecutable( const tensorflow::TfrtPipelineOptions& options)> emit_executable, tfrt_stub::ModelRuntimeContext& model_context, - tfrt_stub::FallbackState* fallback_state = nullptr); + tfrt_stub::FallbackState* fallback_state = nullptr, + std::vector* added_xla_function_names = nullptr); std::unique_ptr GetTfrtPipelineOptions( const TfrtCompileOptions& options); // Adds MLIR functions for XLA clusters to the function library. -tensorflow::Status AddXlaFunctions(tfrt_stub::FallbackState* fallback_state, - mlir::ModuleOp mlir_module); +tensorflow::Status AddXlaFunctions( + tfrt_stub::FallbackState* fallback_state, mlir::ModuleOp mlir_module, + std::vector* added_xla_function_names = nullptr); } // namespace tensorflow diff --git a/tensorflow/core/tfrt/fallback/fallback_state.h b/tensorflow/core/tfrt/fallback/fallback_state.h index 904e04e5245d55..efd96af150fa33 100644 --- a/tensorflow/core/tfrt/fallback/fallback_state.h +++ b/tensorflow/core/tfrt/fallback/fallback_state.h @@ -70,6 +70,10 @@ class FallbackState { return pflr_; } + const FunctionLibraryDefinition &func_lib_def() const { + return func_lib_def_; + } + private: SessionOptions session_options_; StaticDeviceMgr device_manager_; diff --git a/tensorflow/core/tfrt/saved_model/BUILD b/tensorflow/core/tfrt/saved_model/BUILD index 5f1d8cc73c1aff..f001b29660fc4d 100644 --- a/tensorflow/core/tfrt/saved_model/BUILD +++ b/tensorflow/core/tfrt/saved_model/BUILD @@ -41,7 +41,9 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow:translate_lib", "//tensorflow/compiler/mlir/tfrt:import_model", "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:function_proto_cc", "//tensorflow/core/ops", "//tensorflow/core/platform:enable_tf2_utils", "//tensorflow/core/platform:path", @@ -56,6 +58,7 @@ cc_library( "//tensorflow/core/tfrt/utils", "@com_google_absl//absl/log", "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:status", "@local_xla//xla/service:compiler", diff --git a/tensorflow/core/tfrt/saved_model/python/saved_model_aot_compile_wrapper.cc b/tensorflow/core/tfrt/saved_model/python/saved_model_aot_compile_wrapper.cc index 7c1e31fab55e7e..1dd62575fbfd98 100644 --- a/tensorflow/core/tfrt/saved_model/python/saved_model_aot_compile_wrapper.cc +++ b/tensorflow/core/tfrt/saved_model/python/saved_model_aot_compile_wrapper.cc @@ -29,7 +29,8 @@ PYBIND11_MODULE(_pywrap_saved_model_aot_compile, m) { .def(py::init<>()); m.doc() = "pybind11 AotOptions Python - C++ Wrapper"; - m.def("AotCompileSavedModel", &tensorflow::tfrt_stub::AotCompileSavedModel, + m.def("AotCompileSavedModel", + &tensorflow::tfrt_stub::AotCompileSavedModelAndSaveResult, py::arg("input_model_dir") = absl::string_view(), py::arg("aot_options") = tensorflow::tfrt_stub::AotOptions(), py::arg("output_model_dir") = absl::string_view()); diff --git a/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.cc b/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.cc index fb3c26e8e16cc0..2bde4e977e5c8d 100644 --- a/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.cc +++ b/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.cc @@ -19,14 +19,20 @@ limitations under the License. #include #include #include +#include +#include #include "absl/log/log.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "tensorflow/cc/saved_model/constants.h" #include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h" #include "tensorflow/compiler/mlir/tensorflow/utils/serialize_mlir_module_utils.h" #include "tensorflow/compiler/mlir/tfrt/translate/import_model.h" #include "xla/service/compiler.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/file_system_helper.h" #include "tensorflow/core/platform/path.h" @@ -43,6 +49,7 @@ limitations under the License. #include "tsl/platform/errors.h" #include "tsl/platform/file_system_helper.h" #include "tsl/platform/status.h" +#include "tsl/platform/statusor.h" #include "tfrt/bef/bef_buffer.h" // from @tf_runtime #include "tfrt/bef_executor/bef_file.h" // from @tf_runtime #include "tfrt/host_context/resource_context.h" // from @tf_runtime @@ -64,9 +71,9 @@ void UpdateCompileOptions(AotOptions& options) { AotOptions::AotOptions() : graph_execution_options(nullptr) {} -Status AotCompileSavedModel(absl::string_view input_model_dir, - AotOptions aot_options, - absl::string_view output_model_dir) { +Status AotCompileSavedModelAndSaveResult(absl::string_view input_model_dir, + AotOptions aot_options, + absl::string_view output_model_dir) { // Create aot_packages directory. Env* env = Env::Default(); const bool new_directory = !output_model_dir.empty(); @@ -109,6 +116,42 @@ Status AotCompileSavedModel(absl::string_view input_model_dir, aot_options.tags = {"serve", "gpu"}; } + TF_ASSIGN_OR_RETURN(AotResult result, + AotCompileSavedModel(input_model_dir, aot_options)); + + const std::string warmup_requests_path = io::JoinPath( + input_model_dir, "assets.extra", "tf_serving_warmup_requests"); + TF_RETURN_IF_ERROR(env->FileExists(warmup_requests_path)); + + const std::string saved_model_pb_path = + io::JoinPath(input_model_dir, kSavedModelFilenamePb); + const std::string saved_model_pbtxt_path = + io::JoinPath(input_model_dir, kSavedModelFilenamePbTxt); + bool pb_found = env->FileExists(saved_model_pb_path).ok(); + bool pbtxt_found = env->FileExists(saved_model_pbtxt_path).ok(); + if (!pb_found && !pbtxt_found) { + return absl::NotFoundError(absl::StrCat( + "saved_model not found in input directory: ", input_model_dir)); + } + + // Serialize BEF buffer to a file under aot_packages + const std::string serialized_bef_path = + io::JoinPath(aot_directory, kBefBufferFilenameMLIRBEF); + TF_RETURN_IF_ERROR(SerializeBEF(result.bef, serialized_bef_path)); + + if (pb_found) { + const std::string output_file_directory = + io::JoinPath(std::string(output_model_dir), kSavedModelFilenamePb); + return env->CopyFile(saved_model_pb_path, output_file_directory); + } else { + const std::string output_file_directory = + io::JoinPath(std::string(output_model_dir), kSavedModelFilenamePbTxt); + return env->CopyFile(saved_model_pbtxt_path, output_file_directory); + } +} + +StatusOr AotCompileSavedModel(absl::string_view input_model_dir, + AotOptions aot_options) { TF_ASSIGN_OR_RETURN(tensorflow::MetaGraphDef meta_graph_def, ReadSavedModel(input_model_dir, aot_options.tags)); @@ -160,43 +203,27 @@ Status AotCompileSavedModel(absl::string_view input_model_dir, } tfrt::BefBuffer bef; + std::vector xla_function_names; RETURN_IF_ERROR_IN_COMPILE(tensorflow::ConvertTfMlirToBef( aot_options.graph_execution_options->compile_options, mlir_module.get(), - &bef, model_context, fallback_state.get())); + &bef, model_context, fallback_state.get(), &xla_function_names)); if (bef.empty()) { - LOG(DFATAL) << "BefBuffer is empty."; return absl::InternalError("BefBuffer is empty."); } - const std::string warmup_requests_path = io::JoinPath( - input_model_dir, "assets.extra", "tf_serving_warmup_requests"); - TF_RETURN_IF_ERROR(env->FileExists(warmup_requests_path)); - - const std::string saved_model_pb_path = - io::JoinPath(input_model_dir, kSavedModelFilenamePb); - const std::string saved_model_pbtxt_path = - io::JoinPath(input_model_dir, kSavedModelFilenamePbTxt); - bool pb_found = env->FileExists(saved_model_pb_path).ok(); - bool pbtxt_found = env->FileExists(saved_model_pbtxt_path).ok(); - if (!pb_found && !pbtxt_found) { - return absl::NotFoundError(absl::StrCat( - "saved_model not found in input directory: ", input_model_dir)); + const FunctionLibraryDefinition& flib_def = fallback_state->func_lib_def(); + std::vector xla_functions; + xla_functions.reserve(xla_function_names.size()); + for (const std::string& name : xla_function_names) { + const FunctionDef* xla_func_def = flib_def.Find(name); + if (xla_func_def == nullptr) { + return absl::NotFoundError( + absl::StrCat("XLA function ", name, " not found in library.")); + } + xla_functions.push_back(*xla_func_def); } - // Serialize BEF buffer to a file under aot_packages - const std::string serialized_bef_path = - io::JoinPath(aot_directory, kBefBufferFilenameMLIRBEF); - TF_RETURN_IF_ERROR(SerializeBEF(bef, serialized_bef_path)); - - if (pb_found) { - const std::string output_file_directory = - io::JoinPath(std::string(output_model_dir), kSavedModelFilenamePb); - return env->CopyFile(saved_model_pb_path, output_file_directory); - } else { - const std::string output_file_directory = - io::JoinPath(std::string(output_model_dir), kSavedModelFilenamePbTxt); - return env->CopyFile(saved_model_pbtxt_path, output_file_directory); - } + return AotResult{std::move(bef), std::move(xla_functions)}; } // TODO(b/294095043): Create a function (ex Status diff --git a/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.h b/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.h index da5e85f2c06f65..d51cda14f302c1 100644 --- a/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.h +++ b/tensorflow/core/tfrt/saved_model/saved_model_aot_compile.h @@ -19,9 +19,12 @@ limitations under the License. #include #include #include +#include #include "xla/service/compiler.h" +#include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/tfrt/graph_executor/graph_execution_options.h" +#include "tfrt/bef/bef_buffer.h" // from @tf_runtime namespace tensorflow::tfrt_stub { struct AotOptions { @@ -31,13 +34,26 @@ struct AotOptions { std::shared_ptr graph_execution_options; }; -// AOT Compiles saved_model in input_model_dir, writing output +struct AotResult { + tfrt::BefBuffer bef; + // TODO(b/296466237): Investigate whether the whole FunctionDefLibrary should + // be put here. + // XLA cluster functions generated during bridge and their nested functions. + std::vector xla_functions; +}; + +// AOT compiles saved_model in input_model_dir and returns AotResult, otherwise +// returns error. +StatusOr AotCompileSavedModel(absl::string_view input_model_dir, + AotOptions aot_options = {}); + +// AOT compiles saved_model in input_model_dir, writing output // saved_model and aot packages to output_model_dir, or // "{input_model_dir}/aot_packages" if output dir provided. Warmup requests // should be present in input_model_dir -Status AotCompileSavedModel(absl::string_view input_model_dir, - AotOptions aot_options = {}, - absl::string_view output_model_dir = ""); +Status AotCompileSavedModelAndSaveResult( + absl::string_view input_model_dir, AotOptions aot_options = {}, + absl::string_view output_model_dir = ""); } // namespace tensorflow::tfrt_stub From b53353deeecec5c99689c8d63a9e719137612f6a Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Mon, 25 Sep 2023 18:33:55 -0700 Subject: [PATCH 246/567] Update the containers which contains a rebuilt image which removes bazel from jax's docker image and use the ones preinstalled in the sigbuild image PiperOrigin-RevId: 568381631 --- tensorflow/tools/toolchains/remote_config/containers.bzl | 4 ++-- .../tsl/tools/toolchains/remote_config/containers.bzl | 4 ++-- third_party/xla/tools/toolchains/remote_config/containers.bzl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/tools/toolchains/remote_config/containers.bzl b/tensorflow/tools/toolchains/remote_config/containers.bzl index 2819512e4b902c..d2e825200e042d 100644 --- a/tensorflow/tools/toolchains/remote_config/containers.bzl +++ b/tensorflow/tools/toolchains/remote_config/containers.bzl @@ -7,9 +7,9 @@ container_digests = { # JAX manylinux2014 configs. "cuda11.1-cudnn8-ubuntu20.04-manylinux2014-multipython": "sha256:011034978c5f1e5dcecc816b3b964faafc42b243001d9cd09ff7cfe4a6a0f4b9", "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", - "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", + "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:4d5b528d900d5366800c984a2bf737770b2e4b1da8099de64f0e8c44caa08e0f", "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", - "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:55cf36aa54debd7ec7b3aac5a84af1fe3691a186aceae8d1d8eafe886d6a6950", + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:8b55e288ed59e960abd3a3fdcbb19fd7c36183f71766d4733aeb38ab73d1e8d7", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl index 2819512e4b902c..d2e825200e042d 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/containers.bzl @@ -7,9 +7,9 @@ container_digests = { # JAX manylinux2014 configs. "cuda11.1-cudnn8-ubuntu20.04-manylinux2014-multipython": "sha256:011034978c5f1e5dcecc816b3b964faafc42b243001d9cd09ff7cfe4a6a0f4b9", "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", - "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", + "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:4d5b528d900d5366800c984a2bf737770b2e4b1da8099de64f0e8c44caa08e0f", "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", - "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:55cf36aa54debd7ec7b3aac5a84af1fe3691a186aceae8d1d8eafe886d6a6950", + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:8b55e288ed59e960abd3a3fdcbb19fd7c36183f71766d4733aeb38ab73d1e8d7", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", diff --git a/third_party/xla/tools/toolchains/remote_config/containers.bzl b/third_party/xla/tools/toolchains/remote_config/containers.bzl index 2819512e4b902c..d2e825200e042d 100644 --- a/third_party/xla/tools/toolchains/remote_config/containers.bzl +++ b/third_party/xla/tools/toolchains/remote_config/containers.bzl @@ -7,9 +7,9 @@ container_digests = { # JAX manylinux2014 configs. "cuda11.1-cudnn8-ubuntu20.04-manylinux2014-multipython": "sha256:011034978c5f1e5dcecc816b3b964faafc42b243001d9cd09ff7cfe4a6a0f4b9", "cuda11.4-cudnn8.2-ubuntu20.04-manylinux2014-multipython": "sha256:d17894a1349a12baea1732cb133f65f08754ed97d0a6647efe23c916a9ab8f1c", - "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:c973a5dd1b335b83f5cc65ab2d1f12e12c0cc5d310a2d9bf676fcdb52cf08285", + "cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython": "sha256:4d5b528d900d5366800c984a2bf737770b2e4b1da8099de64f0e8c44caa08e0f", "cuda12.0.1-cudnn8.8-ubuntu20.04-manylinux2014-multipython": "sha256:2551b1587bdd0b63a4dd329eba6416cd07acb25496dde411c376609ce4f076f0", - "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:55cf36aa54debd7ec7b3aac5a84af1fe3691a186aceae8d1d8eafe886d6a6950", + "cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython": "sha256:8b55e288ed59e960abd3a3fdcbb19fd7c36183f71766d4733aeb38ab73d1e8d7", # ROCM, probably not all of them still in use "rocm-ubuntu18.04-manylinux2010-multipython": "sha256:6e953a09b145df338bcb03e9e36f99b291140c29b72d0a048fb6c5905ccad5eb", "rocm-ubuntu20.04-manylinux2014-multipython": "sha256:906faec7765fe5dd067f2b092b5d5f220c1fedde725fb42c83d031b4d6f32204", From cc390d943023314588f939f4a7bf2758ef8e837a Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 25 Sep 2023 18:37:05 -0700 Subject: [PATCH 247/567] [stream_executor] CommandBuffer: add APIs for recording kernel launch and memcpy operations This implementation is based on gpu_graph.{h.cc} and for CUDA backend adds CUDA graph nodes to the graph under construction. https://github.com/openxla/xla/issues/5857 PiperOrigin-RevId: 568382083 --- .../xla/xla/stream_executor/command_buffer.h | 4 ++ .../stream_executor/cuda/cuda_gpu_executor.cc | 2 +- third_party/xla/xla/stream_executor/gpu/BUILD | 2 + .../stream_executor/gpu/gpu_command_buffer.cc | 43 ++++++++++++++++++- .../stream_executor/gpu/gpu_command_buffer.h | 15 ++++++- .../stream_executor_internal.h | 41 +++++++++++++++++- 6 files changed, 102 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/stream_executor/command_buffer.h b/third_party/xla/xla/stream_executor/command_buffer.h index fba2e1ec99e4da..a22ddb501732fd 100644 --- a/third_party/xla/xla/stream_executor/command_buffer.h +++ b/third_party/xla/xla/stream_executor/command_buffer.h @@ -42,6 +42,10 @@ class CommandBuffer { static tsl::StatusOr Create(StreamExecutor* executor); + internal::CommandBufferInterface* operator->() { + return implementation_.get(); + } + private: std::unique_ptr implementation_; diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc b/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc index 483f9a973083d1..212cc6643a006f 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_gpu_executor.cc @@ -849,7 +849,7 @@ GpuExecutor::GetCommandBufferImplementation() { VLOG(2) << "Create CUDA command buffer (CUDA graph)"; GpuGraphHandle graph = nullptr; TF_RETURN_IF_ERROR(GpuDriver::CreateGraph(&graph)); - return std::make_unique(graph); + return std::make_unique(this, graph); } void* GpuExecutor::GpuContextHack() { return context_; } diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 87a41605b5046d..80bda68ef3f6b7 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -90,11 +90,13 @@ cc_library( deps = [ ":gpu_driver_header", ":gpu_executor_header", + ":gpu_kernel_header", ":gpu_stream", ":gpu_types_header", "//xla/stream_executor:stream_executor_headers", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", + "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", ], diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc index 9e18de6611234e..03c4d170bd11ed 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc @@ -21,7 +21,13 @@ limitations under the License. #include "absl/log/check.h" #include "absl/log/log.h" #include "xla/stream_executor/gpu/gpu_driver.h" +#include "xla/stream_executor/gpu/gpu_executor.h" +#include "xla/stream_executor/gpu/gpu_kernel.h" #include "xla/stream_executor/gpu/gpu_types.h" +#include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/launch_dim.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/status.h" namespace stream_executor::gpu { @@ -54,8 +60,8 @@ static int64_t NotifyExecDestroyed() { // GpuCommandBuffer implementation //===----------------------------------------------------------------------===// -GpuCommandBuffer::GpuCommandBuffer(GpuGraphHandle graph) - : graph_(graph), exec_(nullptr) {} +GpuCommandBuffer::GpuCommandBuffer(GpuExecutor* parent, GpuGraphHandle graph) + : parent_(parent), graph_(graph), exec_(nullptr) {} GpuCommandBuffer::~GpuCommandBuffer() { if (exec_ != nullptr) { @@ -70,4 +76,37 @@ GpuCommandBuffer::~GpuCommandBuffer() { } } +static GpuDevicePtr AsDevicePtr(const DeviceMemoryBase& mem) { + return reinterpret_cast(const_cast(mem.opaque())); +} + +tsl::Status GpuCommandBuffer::Launch(const ThreadDim& threads, + const BlockDim& blocks, + const KernelBase& kernel, + const KernelArgsArrayBase& args) { + const GpuKernel* gpu_kernel = AsGpuKernel(&kernel); + GpuFunctionHandle gpu_func = gpu_kernel->AsGpuFunctionHandle(); + + void** kernel_params = const_cast(args.argument_addresses().data()); + + GpuGraphNodeHandle node; + TF_RETURN_IF_ERROR(GpuDriver::GraphAddKernelNode( + &node, graph_, {}, kernel.name(), gpu_func, blocks.x, blocks.y, blocks.z, + threads.x, threads.y, threads.z, args.number_of_shared_bytes(), + kernel_params, /*extra=*/nullptr)); + + return tsl::OkStatus(); +} + +tsl::Status GpuCommandBuffer::MemcpyDeviceToDevice(DeviceMemoryBase* dst, + const DeviceMemoryBase& src, + uint64_t size) { + GpuGraphNodeHandle node; + TF_RETURN_IF_ERROR(GpuDriver::GraphAddMemcpyD2DNode( + parent_->gpu_context(), &node, graph_, {}, AsDevicePtr(*dst), + AsDevicePtr(src), size)); + + return tsl::OkStatus(); +} + } // namespace stream_executor::gpu diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h index 7ce16607b7a60d..b744a02e90be1f 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h @@ -19,8 +19,12 @@ limitations under the License. #include #include +#include "xla/stream_executor/gpu/gpu_executor.h" #include "xla/stream_executor/gpu/gpu_types.h" +#include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream_executor_internal.h" +#include "tsl/platform/status.h" namespace stream_executor::gpu { @@ -28,9 +32,17 @@ namespace stream_executor::gpu { // implementation (it's backed by CUDA or HIP graphs on NVIDIA and AMD devices). class GpuCommandBuffer : public internal::CommandBufferInterface { public: - explicit GpuCommandBuffer(GpuGraphHandle graph); + GpuCommandBuffer(GpuExecutor* parent, GpuGraphHandle graph); ~GpuCommandBuffer() override; + tsl::Status Launch(const ThreadDim& threads, const BlockDim& blocks, + const KernelBase& kernel, + const KernelArgsArrayBase& args) override; + + tsl::Status MemcpyDeviceToDevice(DeviceMemoryBase* dst, + const DeviceMemoryBase& src, + uint64_t size) override; + // We track the total number of allocated and alive executable graphs in the // process to track the command buffers resource usage. Executable graph // allocates resources on a GPU devices (rule of thumb is ~8kb per node), so @@ -50,6 +62,7 @@ class GpuCommandBuffer : public internal::CommandBufferInterface { static_assert(std::is_pointer_v, "GpuGraphExecHandle must be a pointer"); + GpuExecutor* parent_; // not owned, must outlive *this GpuGraphHandle graph_ = nullptr; // owned handle GpuGraphExecHandle exec_ = nullptr; // owned handle }; diff --git a/third_party/xla/xla/stream_executor/stream_executor_internal.h b/third_party/xla/xla/stream_executor/stream_executor_internal.h index 9acd28dc84deb5..b57d683a82301d 100644 --- a/third_party/xla/xla/stream_executor/stream_executor_internal.h +++ b/third_party/xla/xla/stream_executor/stream_executor_internal.h @@ -52,6 +52,10 @@ namespace stream_executor { class Stream; +//===----------------------------------------------------------------------===// +// ModuleHandle +//===----------------------------------------------------------------------===// + // An opaque handle to a loaded module. // // An instance of this is returned from StreamExecutor::GetModule. @@ -71,6 +75,10 @@ class ModuleHandle { namespace internal { +//===----------------------------------------------------------------------===// +// EventInterface +//===----------------------------------------------------------------------===// + // Platform-dependent interface class for the generic Events interface, in // the PIMPL style. class EventInterface { @@ -82,6 +90,10 @@ class EventInterface { SE_DISALLOW_COPY_AND_ASSIGN(EventInterface); }; +//===----------------------------------------------------------------------===// +// KernelInterface +//===----------------------------------------------------------------------===// + // Pointer-to-implementation object type (i.e. the KernelBase class delegates to // this interface) with virtual destruction. This class exists for the // platform-dependent code to hang any kernel data/resource info/functionality @@ -107,16 +119,39 @@ class KernelInterface { SE_DISALLOW_COPY_AND_ASSIGN(KernelInterface); }; -// Platform-dependent interface class implementing generic CommandBuffer. +//===----------------------------------------------------------------------===// +// CommandBufferInterface +//===----------------------------------------------------------------------===// + +// Platform-dependent interface class for implementing generic CommandBuffer. +// +// TODO(ezhulenev): Currently we assume that all operations between barriers +// can execute concurrently, and it's up to the caller to insert barriers to +// guarantee correctness. Consider adding finer grained synchronization +// mechanism between different commands. class CommandBufferInterface { public: CommandBufferInterface() = default; virtual ~CommandBufferInterface() = default; + // Adds a kernel launch command to the command buffer. + virtual tsl::Status Launch(const ThreadDim& threads, const BlockDim& blocks, + const KernelBase& kernel, + const KernelArgsArrayBase& args) = 0; + + // Adds a device-to-device memory copy to the command buffer. + virtual tsl::Status MemcpyDeviceToDevice(DeviceMemoryBase* dst, + const DeviceMemoryBase& src, + uint64_t size) = 0; + private: SE_DISALLOW_COPY_AND_ASSIGN(CommandBufferInterface); }; +//===----------------------------------------------------------------------===// +// StreamInterface +//===----------------------------------------------------------------------===// + // Pointer-to-implementation object type (i.e. the Stream class delegates to // this interface) with virtual destruction. This class exists for the // platform-dependent code to hang any kernel data/resource info/functionality @@ -155,6 +190,10 @@ class StreamInterface { SE_DISALLOW_COPY_AND_ASSIGN(StreamInterface); }; +//===----------------------------------------------------------------------===// +// StreamExecutorInterface +//===----------------------------------------------------------------------===// + // Interface for the different StreamExecutor platforms (i.e. CUDA, OpenCL). // // Various platforms will provide an implementation that satisfy this interface. From d8b5490ded4b24da09d7fe08d8a6b116b3e9d452 Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Mon, 25 Sep 2023 18:49:21 -0700 Subject: [PATCH 248/567] Remove bazel from jax's docker image and use the ones preinstalled in the sigbuild image PiperOrigin-RevId: 568383806 --- ...rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython | 3 --- ...rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython | 3 --- 2 files changed, 6 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython index 83223fde6e478c..460e54add5d558 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-multipython @@ -31,9 +31,6 @@ RUN apt-get update && apt-get install -y \ && \ rm -rf /var/lib/apt/lists/* -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - COPY install/build_and_install_python.sh /install/ RUN /install/build_and_install_python.sh "3.9.4" RUN /install/build_and_install_python.sh "3.10.0" diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython index 12e9356664f896..308b709bf51471 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-multipython @@ -30,9 +30,6 @@ RUN apt-get update && apt-get install -y \ && \ rm -rf /var/lib/apt/lists/* -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - COPY install/build_and_install_python.sh /install/ RUN /install/build_and_install_python.sh "3.9.4" RUN /install/build_and_install_python.sh "3.10.0" From cf5e85663fef097f83f5004f112ae42758e9b035 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 25 Sep 2023 19:43:50 -0700 Subject: [PATCH 249/567] [stream_executor] NFC: Restrict visibility of stream_executor_pimpl target `stream_executor_pimpl` is an internal implementation detail planned to be removed. Remove all external users of this target and clean up some of the build files to remove dependencies on other internal targets planned to be removed. https://github.com/openxla/xla/issues/5761 PiperOrigin-RevId: 568390988 --- tensorflow/c/experimental/stream_executor/BUILD | 3 --- tensorflow/python/BUILD | 2 +- third_party/xla/xla/backends/interpreter/BUILD | 2 -- third_party/xla/xla/client/BUILD | 1 - third_party/xla/xla/mlir/backends/gpu/BUILD | 2 +- third_party/xla/xla/service/BUILD | 11 +++-------- third_party/xla/xla/service/cpu/BUILD | 2 -- third_party/xla/xla/service/gpu/tests/BUILD | 4 ++-- third_party/xla/xla/stream_executor/BUILD | 6 ++++-- third_party/xla/xla/stream_executor/cuda/BUILD | 8 ++++---- third_party/xla/xla/stream_executor/gpu/BUILD | 4 ++-- third_party/xla/xla/stream_executor/host/BUILD | 1 - third_party/xla/xla/stream_executor/rocm/BUILD | 6 +++--- .../xla/xla/translate/mhlo_to_lhlo_with_xla/BUILD | 2 +- third_party/xla/xla/xla.bzl | 2 ++ 15 files changed, 23 insertions(+), 33 deletions(-) diff --git a/tensorflow/c/experimental/stream_executor/BUILD b/tensorflow/c/experimental/stream_executor/BUILD index 5cf55e03aa778e..4c02379a999052 100644 --- a/tensorflow/c/experimental/stream_executor/BUILD +++ b/tensorflow/c/experimental/stream_executor/BUILD @@ -46,7 +46,6 @@ cc_library( "//tensorflow/core/platform:strcat", "@com_google_absl//absl/functional:any_invocable", "@local_xla//xla/stream_executor", - "@local_xla//xla/stream_executor:multi_platform_manager", "@local_xla//xla/stream_executor:platform", ], ) @@ -80,8 +79,6 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core/protobuf:error_codes_proto_impl_cc", "@local_xla//xla/stream_executor", - "@local_xla//xla/stream_executor:multi_platform_manager", - "@local_xla//xla/stream_executor:stream_executor_pimpl", ], ) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4d79a70ca67e2c..7ad991d1d5b87b 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -940,7 +940,7 @@ filegroup( "@local_tsl//tsl/profiler/rpc/client:profiler_client_impl", "@local_tsl//tsl/python/lib/core:ml_dtypes_lib", # bfloat16, float8_e4m3fn, float8_e5m2 "@local_tsl//tsl/python/lib/core:numpy", # checkpoint_reader - "@local_xla//xla/stream_executor:stream_executor_pimpl", # stat_summarizer + "@local_xla//xla/stream_executor", # stat_summarizer ] + if_xla_available([ "//tensorflow/compiler/aot:tfcompile_lib", # tfcompile "@local_xla//xla:status_macros", # tfcompile diff --git a/third_party/xla/xla/backends/interpreter/BUILD b/third_party/xla/xla/backends/interpreter/BUILD index 938e4ae4af27d4..bba10b2e1fd7ee 100644 --- a/third_party/xla/xla/backends/interpreter/BUILD +++ b/third_party/xla/xla/backends/interpreter/BUILD @@ -144,8 +144,6 @@ cc_library( ":executor", ":platform_id", "//xla/stream_executor", - "//xla/stream_executor:executor_cache", - "//xla/stream_executor:stream_executor_pimpl", "//xla/stream_executor/platform", "@com_google_absl//absl/strings:str_format", "@local_tsl//tsl/platform:status", diff --git a/third_party/xla/xla/client/BUILD b/third_party/xla/xla/client/BUILD index 4c47f78ef180ad..047f2590bd3c6e 100644 --- a/third_party/xla/xla/client/BUILD +++ b/third_party/xla/xla/client/BUILD @@ -142,7 +142,6 @@ cc_library( "//xla/service:stream_pool", "//xla/stream_executor", "//xla/stream_executor:device_memory_allocator", - "//xla/stream_executor:stream_executor_pimpl", # fixdeps: keep "@com_google_absl//absl/types:span", ], ) diff --git a/third_party/xla/xla/mlir/backends/gpu/BUILD b/third_party/xla/xla/mlir/backends/gpu/BUILD index 0cbdc1e915e4c6..e97e14560506cd 100644 --- a/third_party/xla/xla/mlir/backends/gpu/BUILD +++ b/third_party/xla/xla/mlir/backends/gpu/BUILD @@ -20,7 +20,7 @@ xla_cc_binary( "//xla/mlir/backends/gpu/transforms:passes", "//xla/mlir_hlo:lhlo", "//xla/mlir_hlo:lhlo_gpu", - "//xla/stream_executor:stream_executor_impl", + "//xla/stream_executor", "@llvm-project//mlir:FuncDialect", "@llvm-project//mlir:FuncExtensions", "@llvm-project//mlir:GPUDialect", diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 2a4e6d77e45cba..b29f6d1a832b07 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -1065,7 +1065,6 @@ cc_library( "//xla:types", "//xla:util", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_pimpl", "//xla/stream_executor/cuda:cuda_platform_id", "//xla/stream_executor/host:host_platform_id", "//xla/stream_executor/rocm:rocm_platform_id", @@ -1090,7 +1089,6 @@ cc_library( "//xla:util", "//xla/stream_executor", "//xla/stream_executor:device_memory_allocator", - "//xla/stream_executor:stream_executor_pimpl", "//xla/stream_executor/host:host_platform_id", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", @@ -1145,7 +1143,6 @@ cc_library( "//xla/hlo/ir:hlo_module_group", "//xla/stream_executor", "//xla/stream_executor:device_memory_allocator", - "//xla/stream_executor:stream_executor_pimpl", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", @@ -1335,11 +1332,12 @@ cc_library( name = "gpu_plugin_impl", compatible_with = get_compatible_with_portable(), visibility = ["//visibility:public"], - deps = if_gpu_is_configured([ + deps = [ + "//xla/stream_executor:stream_executor_impl", + ] + if_gpu_is_configured([ ":service", "//xla/service/gpu:gpu_compiler", "//xla/service/gpu:gpu_transfer_manager", - "//xla/stream_executor", ]) + if_cuda_is_configured([ "//xla/service/gpu:nvptx_compiler", "//xla/stream_executor/cuda:stream_executor_cuda", @@ -1562,7 +1560,6 @@ cc_library( "//xla:util", "//xla:xla_data_proto_cc", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_pimpl", "@local_tsl//tsl/platform:logging", ], ) @@ -3901,7 +3898,6 @@ cc_library( "//xla:util", "//xla:xla_data_proto_cc", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_pimpl", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", ], @@ -5396,7 +5392,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//xla/stream_executor", - "//xla/stream_executor:stream_executor_pimpl", ], ) diff --git a/third_party/xla/xla/service/cpu/BUILD b/third_party/xla/xla/service/cpu/BUILD index 6bbaf8de88d43f..f4c9fde056b1c2 100644 --- a/third_party/xla/xla/service/cpu/BUILD +++ b/third_party/xla/xla/service/cpu/BUILD @@ -344,7 +344,6 @@ cc_library( "//xla/service/llvm_ir:llvm_util", "//xla/service/spmd:stateful_rng_spmd_partitioner", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_pimpl", # fixdeps: keep "//xla/stream_executor/host:host_platform_id", "//xla/translate/hlo_to_mhlo:hlo_to_mlir_hlo", "//xla/translate/hlo_to_mhlo:hlo_utils", @@ -439,7 +438,6 @@ cc_library( "//xla/service:hlo_proto_cc", "//xla/service:llvm_compiler", "//xla/stream_executor", - "//xla/stream_executor:stream_executor_pimpl", # fixdeps: keep "//xla/stream_executor/host:host_platform_id", "@llvm-project//llvm:Target", ], diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD index 854766e620a00f..06d9e3ed3bc01a 100644 --- a/third_party/xla/xla/service/gpu/tests/BUILD +++ b/third_party/xla/xla/service/gpu/tests/BUILD @@ -727,10 +727,10 @@ xla_cc_binary( "//xla/service/gpu:gpu_device_info_for_tests", "//xla/service/gpu:target_constants", "//xla/service/gpu/llvm_gpu_backend", + "//xla/stream_executor", "//xla/stream_executor:device_description", "//xla/stream_executor:device_description_proto_cc_impl", "//xla/stream_executor:dnn", - "//xla/stream_executor:stream_executor_impl", "//xla/stream_executor/host:host_platform", "//xla/tests:test_utils", "//xla/tools:hlo_module_loader", @@ -891,9 +891,9 @@ xla_cc_test( "//xla/client:xla_computation", "//xla/hlo/ir:hlo", "//xla/service/gpu:cublas_cudnn", + "//xla/stream_executor", "//xla/stream_executor:device_description", "//xla/stream_executor:dnn", - "//xla/stream_executor:stream_executor_pimpl", "//xla/tests:client_library_test_base", "//xla/tests:hlo_test_base", "//xla/tests:literal_test_util", diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index b0f10c4d83d930..ab8e0b77ef79f1 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -595,8 +595,8 @@ cc_library( hdrs = ["event.h"], visibility = ["//visibility:public"], deps = [ + ":stream_executor_headers", ":stream_executor_internal", - ":stream_executor_pimpl_header", "//xla/stream_executor/platform", "@local_tsl//tsl/platform:status", ], @@ -659,7 +659,6 @@ cc_library( deps = [ ":device_memory", ":stream_executor_headers", - ":stream_executor_pimpl_header", ":temporary_device_memory", "//xla/stream_executor/platform", "@com_google_absl//absl/base:core_headers", @@ -695,6 +694,7 @@ cc_library( cc_library( name = "stream_executor", textual_hdrs = [ + "allocator_stats.h", "blas.h", "data_type.h", "device_description.h", @@ -741,6 +741,8 @@ cc_library( name = "stream_executor_impl", visibility = ["//visibility:public"], deps = [ + ":allocator_stats", + ":device_description", ":device_memory", ":dnn", ":dnn_proto_cc", diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index 22d30bf5fd23f9..3b5e64c27d93ea 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -61,7 +61,7 @@ cc_library( "//xla/stream_executor", # buildcleaner: keep "//xla/stream_executor:executor_cache", "//xla/stream_executor:multi_platform_manager", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/platform", ], ) + tf_additional_cuda_platform_deps() + [ @@ -360,7 +360,7 @@ cc_library( "//xla/stream_executor:event", "//xla/stream_executor:plugin_registry", "//xla/stream_executor:scratch_allocator", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor:temporary_device_memory", "//xla/stream_executor/platform", "@local_tsl//tsl/platform:errors", @@ -381,7 +381,7 @@ cc_library( ":cuda_driver", "@local_config_cuda//cuda:cuda_headers", "//xla/stream_executor:event", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/gpu:gpu_kernel_header", "//xla/stream_executor/platform", ]), @@ -460,7 +460,7 @@ cc_library( "//xla/stream_executor:event", "//xla/stream_executor:plugin_registry", "//xla/stream_executor:stream_executor_internal", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/gpu:asm_compiler", "//xla/stream_executor/gpu:gpu_command_buffer", "//xla/stream_executor/gpu:gpu_executor_header", diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 80bda68ef3f6b7..5d35ea3380f44b 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -137,8 +137,8 @@ cc_library( ":gpu_kernel_header", "//xla/stream_executor:event", "//xla/stream_executor:platform", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor:stream_executor_internal", - "//xla/stream_executor:stream_executor_pimpl_header", "//xla/stream_executor/platform", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", @@ -201,8 +201,8 @@ cc_library( deps = [ ":gpu_driver_header", "//xla/stream_executor:event", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor:stream_executor_internal", - "//xla/stream_executor:stream_executor_pimpl_header", "//xla/stream_executor/platform", "@local_tsl//tsl/platform:logging", ], diff --git a/third_party/xla/xla/stream_executor/host/BUILD b/third_party/xla/xla/stream_executor/host/BUILD index 1cea4b7e5485eb..d6b2d697af45d5 100644 --- a/third_party/xla/xla/stream_executor/host/BUILD +++ b/third_party/xla/xla/stream_executor/host/BUILD @@ -88,7 +88,6 @@ cc_library( "//xla/stream_executor", "//xla/stream_executor:kernel", "//xla/stream_executor:stream_executor_internal", - "//xla/stream_executor:stream_executor_pimpl", # fixdeps: keep "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", diff --git a/third_party/xla/xla/stream_executor/rocm/BUILD b/third_party/xla/xla/stream_executor/rocm/BUILD index a4c1bf64976212..57826e043149ca 100644 --- a/third_party/xla/xla/stream_executor/rocm/BUILD +++ b/third_party/xla/xla/stream_executor/rocm/BUILD @@ -111,7 +111,7 @@ cc_library( "//xla/stream_executor:event", "//xla/stream_executor:plugin_registry", "//xla/stream_executor:stream_executor_internal", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/gpu:gpu_activation_header", "//xla/stream_executor/gpu:gpu_event", "//xla/stream_executor/gpu:gpu_kernel_header", @@ -161,7 +161,7 @@ cc_library( "//xla/stream_executor", # buildcleaner: keep "//xla/stream_executor:executor_cache", "//xla/stream_executor:multi_platform_manager", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/platform", ]), alwayslink = True, # Registers itself with the MultiPlatformManager. @@ -318,7 +318,7 @@ cc_library( "//xla/stream_executor:event", "//xla/stream_executor:plugin_registry", "//xla/stream_executor:scratch_allocator", - "//xla/stream_executor:stream_executor_pimpl_header", + "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor:temporary_device_memory", "//xla/stream_executor/gpu:gpu_activation_header", "//xla/stream_executor/gpu:gpu_stream_header", diff --git a/third_party/xla/xla/translate/mhlo_to_lhlo_with_xla/BUILD b/third_party/xla/xla/translate/mhlo_to_lhlo_with_xla/BUILD index f19765af13bb55..165c60944986e9 100644 --- a/third_party/xla/xla/translate/mhlo_to_lhlo_with_xla/BUILD +++ b/third_party/xla/xla/translate/mhlo_to_lhlo_with_xla/BUILD @@ -116,7 +116,7 @@ xla_cc_binary( "//xla/mlir_hlo:all_passes", "//xla/mlir_hlo:hlo_dialect_registration", "//xla/service:gpu_plugin", - "//xla/stream_executor:stream_executor_impl", + "//xla/stream_executor", "@llvm-project//llvm:Support", "@llvm-project//mlir:AllPassesAndDialects", "@llvm-project//mlir:MlirOptLib", diff --git a/third_party/xla/xla/xla.bzl b/third_party/xla/xla/xla.bzl index 3fe599358e2baf..ee021b9b78c951 100644 --- a/third_party/xla/xla/xla.bzl +++ b/third_party/xla/xla/xla.bzl @@ -63,6 +63,7 @@ def xla_cc_binary(deps = None, copts = tsl_copts(), **kwargs): "//xla/service/gpu:backend_configs_cc_impl", "//xla/service/gpu:hlo_op_profile_proto_cc_impl", "//xla/stream_executor:dnn_proto_cc_impl", + "//xla/stream_executor:stream_executor_impl", "@local_tsl//tsl/platform:env_impl", "@local_tsl//tsl/platform:tensor_float_32_utils", "@local_tsl//tsl/profiler/utils:time_utils_impl", @@ -95,6 +96,7 @@ def xla_cc_test( clean_dep("//xla/service/gpu:backend_configs_cc_impl"), clean_dep("//xla/service/gpu:hlo_op_profile_proto_cc_impl"), clean_dep("//xla/stream_executor:dnn_proto_cc_impl"), + clean_dep("//xla/stream_executor:device_id_utils"), clean_dep("//xla/stream_executor:stream_executor_impl"), clean_dep("//xla/stream_executor/gpu:gpu_cudamallocasync_allocator"), clean_dep("//xla/stream_executor/gpu:gpu_init_impl"), From f947c9c19700e84c255a344ee0d03cc45cf9fd04 Mon Sep 17 00:00:00 2001 From: Luke Boyer Date: Mon, 25 Sep 2023 20:09:56 -0700 Subject: [PATCH 250/567] TensorListElementShape kernel implementation. PiperOrigin-RevId: 568396257 --- tensorflow/lite/kernels/variants/BUILD | 15 +++ .../list_kernels/list_element_shape.cc | 111 ++++++++++++++++++ .../list_kernels/list_element_shape_test.cc | 98 ++++++++++++++++ .../lite/kernels/variants/list_ops_lib.h | 2 + .../kernels/variants/register_list_ops.cc | 1 + 5 files changed, 227 insertions(+) create mode 100644 tensorflow/lite/kernels/variants/list_kernels/list_element_shape.cc create mode 100644 tensorflow/lite/kernels/variants/list_kernels/list_element_shape_test.cc diff --git a/tensorflow/lite/kernels/variants/BUILD b/tensorflow/lite/kernels/variants/BUILD index 55fd1a30ac1c5a..207f570302bffd 100644 --- a/tensorflow/lite/kernels/variants/BUILD +++ b/tensorflow/lite/kernels/variants/BUILD @@ -142,6 +142,21 @@ cc_test( ], ) +cc_test( + name = "list_element_shape_test", + srcs = ["list_kernels/list_element_shape_test.cc"], + deps = [ + ":list_ops_lib", + ":tensor_array", + ":test_util", + "//tensorflow/lite/core/c:c_api_types", + "//tensorflow/lite/core/c:common", + "//tensorflow/lite/kernels:test_util", + "//tensorflow/lite/schema:schema_fbs", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "list_ops_util", srcs = ["list_ops_util.cc"], diff --git a/tensorflow/lite/kernels/variants/list_kernels/list_element_shape.cc b/tensorflow/lite/kernels/variants/list_kernels/list_element_shape.cc new file mode 100644 index 00000000000000..b07a72257f4638 --- /dev/null +++ b/tensorflow/lite/kernels/variants/list_kernels/list_element_shape.cc @@ -0,0 +1,111 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/lite/array.h" +#include "tensorflow/lite/core/c/c_api_types.h" +#include "tensorflow/lite/core/c/common.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/variants/list_ops_lib.h" +#include "tensorflow/lite/kernels/variants/tensor_array.h" + +namespace tflite { +namespace variants { +namespace ops { +namespace list_element_shape { +namespace { + +// This kernel returns a `TfLiteTensor` which represents the shape signature +// of the tensorlist elements stored in the `TensorArray::ElementShape` field as +// a `TfLiteIntArray`. Encoding shape signatures in tensors works as follows: +// +// An unranked shape signature: +// `TfLiteIntArray` : [] +// `TfLiteTensor` : shape = [], data = [-1] +// +// A scalar shape signature: +// `TfLiteIntArray` : [0] +// `TfLiteTensor` : shape = [0], data = [] +// +// A ranked tensor shape signature (with possibly dynamic dimensions): +// `TfLiteIntArray` : [Dim1, Dim2 ... DimRank] (DimI >= -1) +// `TfLiteTensor` : shape = [Rank], data = [Dim1, Dim2 ... DimRank] + +using ::tflite::variants::TensorArray; + +constexpr int kListInput = 0; +constexpr int kShapeOut = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + + const TfLiteTensor* list_input; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kListInput, &list_input)); + TF_LITE_ENSURE(context, list_input->type == kTfLiteVariant); + + TfLiteTensor* shape_out; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kShapeOut, &shape_out)); + TF_LITE_ENSURE_TYPES_EQ(context, shape_out->type, kTfLiteInt32); + + SetTensorToDynamic(shape_out); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* list_input; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kListInput, &list_input)); + + const TensorArray* const list = + reinterpret_cast(list_input->data.data); + const TfLiteIntArray& element_shape = *list->ElementShape(); + + TfLiteTensor* shape_out; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kShapeOut, &shape_out)); + + if (element_shape.size == 0) { + // Unranked + context->ResizeTensor(context, shape_out, BuildTfLiteArray(0).release()); + GetTensorData(shape_out)[0] = -1; + } else if (element_shape.data[0] == 0) { + // Scalar + context->ResizeTensor(context, shape_out, BuildTfLiteArray({0}).release()); + } else { + // Ranked + context->ResizeTensor(context, shape_out, + BuildTfLiteArray({element_shape.size}).release()); + memcpy(GetTensorData(shape_out), element_shape.data, + element_shape.size * sizeof(int32_t)); + } + return kTfLiteOk; +} +} // namespace +} // namespace list_element_shape + +TfLiteRegistration* Register_LIST_ELEMENT_SHAPE() { + static TfLiteRegistration r = {nullptr, nullptr, list_element_shape::Prepare, + list_element_shape::Eval}; + return &r; +} + +} // namespace ops +} // namespace variants +} // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_kernels/list_element_shape_test.cc b/tensorflow/lite/kernels/variants/list_kernels/list_element_shape_test.cc new file mode 100644 index 00000000000000..0078e047eedd69 --- /dev/null +++ b/tensorflow/lite/kernels/variants/list_kernels/list_element_shape_test.cc @@ -0,0 +1,98 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include +#include +#include "tensorflow/lite/core/c/c_api_types.h" +#include "tensorflow/lite/core/c/common.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/kernels/variants/list_kernels/test_util.h" +#include "tensorflow/lite/kernels/variants/list_ops_lib.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace variants { +namespace ops { +namespace { + +using ::testing::ElementsAreArray; + +class ListElementShapeModel : public ListOpModel { + public: + ListElementShapeModel() { + list_input_ = AddInput({TensorType_VARIANT, {}}); + shape_output_ = AddOutput({TensorType_INT32, {}}); + SetCustomOp("ListElementShape", {}, Register_LIST_ELEMENT_SHAPE); + BuildInterpreter({{}}); + } + const TfLiteTensor* GetOutputTensor(int index) { + return interpreter_->tensor(index); + } + int list_input_; + int shape_output_; +}; + +TEST(ListElementShapeTest, MultiDimStaticShape) { + ListElementShapeModel m; + m.PopulateListTensor(0, {2, 2}, 10, kTfLiteInt32); + + ASSERT_EQ(m.Invoke(), kTfLiteOk); + + const TfLiteTensor* const out = m.GetOutputTensor(m.shape_output_); + ASSERT_THAT(out, DimsAre({2})); + ASSERT_THAT(std::vector(out->data.i32, out->data.i32 + 2), + ElementsAreArray({2, 2})); +} + +TEST(ListElementShapeTest, MultiDimWithDynamicDims) { + ListElementShapeModel m; + m.PopulateListTensor(0, {2, -1, 3}, 10, kTfLiteInt32); + + ASSERT_EQ(m.Invoke(), kTfLiteOk); + + const TfLiteTensor* const out = m.GetOutputTensor(m.shape_output_); + ASSERT_THAT(out, DimsAre({3})); + ASSERT_THAT(std::vector(out->data.i32, out->data.i32 + 3), + ElementsAreArray({2, -1, 3})); +} + +TEST(ListElementShapeTest, ScalarShape) { + ListElementShapeModel m; + m.PopulateListTensor(0, {0}, 10, kTfLiteInt32); + + ASSERT_EQ(m.Invoke(), kTfLiteOk); + + const TfLiteTensor* const out = m.GetOutputTensor(m.shape_output_); + ASSERT_THAT(out, DimsAre({0})); + ASSERT_EQ(out->bytes, 0); +} + +TEST(ListElementShapeTest, UnrankedShape) { + ListElementShapeModel m; + m.PopulateListTensor(0, {}, 10, kTfLiteInt32); + + ASSERT_EQ(m.Invoke(), kTfLiteOk); + + const TfLiteTensor* const out = m.GetOutputTensor(m.shape_output_); + ASSERT_THAT(out, DimsAre({})); + ASSERT_EQ(out->bytes, sizeof(int)); + ASSERT_EQ(out->data.i32[0], -1); +} + +} // namespace +} // namespace ops +} // namespace variants +} // namespace tflite diff --git a/tensorflow/lite/kernels/variants/list_ops_lib.h b/tensorflow/lite/kernels/variants/list_ops_lib.h index 4bf1b6038661ed..f0e8eff703b3a9 100644 --- a/tensorflow/lite/kernels/variants/list_ops_lib.h +++ b/tensorflow/lite/kernels/variants/list_ops_lib.h @@ -41,6 +41,8 @@ TfLiteRegistration* Register_LIST_GET_ITEM(); TfLiteRegistration* Register_LIST_LENGTH(); +TfLiteRegistration* Register_LIST_ELEMENT_SHAPE(); + } // namespace ops } // namespace variants } // namespace tflite diff --git a/tensorflow/lite/kernels/variants/register_list_ops.cc b/tensorflow/lite/kernels/variants/register_list_ops.cc index 6c9f205377b0c5..cdcc47986158f2 100644 --- a/tensorflow/lite/kernels/variants/register_list_ops.cc +++ b/tensorflow/lite/kernels/variants/register_list_ops.cc @@ -28,6 +28,7 @@ void RegisterListOps(MutableOpResolver* resolver) { resolver->AddCustom("TensorListFromTensor", Register_LIST_FROM_TENSOR()); resolver->AddCustom("TensorListGetItem", Register_LIST_GET_ITEM()); resolver->AddCustom("TensorListLength", Register_LIST_LENGTH()); + resolver->AddCustom("TensorListElementShape", Register_LIST_ELEMENT_SHAPE()); } } // namespace ops From c7b5860dbcc8ad8d8236d0a52603643d5886097a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 22:02:32 -0700 Subject: [PATCH 251/567] Adding helper method to allow setting profile keys map. PiperOrigin-RevId: 568413206 --- third_party/xla/xla/hlo/ir/hlo_module.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/xla/xla/hlo/ir/hlo_module.h b/third_party/xla/xla/hlo/ir/hlo_module.h index 00356d955e9a61..7ebc303b1b83e8 100644 --- a/third_party/xla/xla/hlo/ir/hlo_module.h +++ b/third_party/xla/xla/hlo/ir/hlo_module.h @@ -563,6 +563,14 @@ class HloModule { autofdo_profile_keys_[profile_type] = std::string(profile_key); } + void set_autofdo_profile_keys( + const absl::flat_hash_map& + profile_keys) { + for (const auto& [profile_type, profile_key] : profile_keys) { + autofdo_profile_keys_[profile_type] = profile_key; + } + } + const absl::flat_hash_map& autofdo_profile_keys() const { return autofdo_profile_keys_; From f0f6eb81e7ed9e835f0ae59194ea389abd4cfe2a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 25 Sep 2023 23:22:57 -0700 Subject: [PATCH 252/567] Fixes build error with clang compiler. PiperOrigin-RevId: 568431979 --- tensorflow/core/example/feature_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h index 777040a692c4dc..5f55138a94f4b0 100644 --- a/tensorflow/core/example/feature_util.h +++ b/tensorflow/core/example/feature_util.h @@ -223,7 +223,7 @@ struct NoneSuch {}; // True if the Feature map in a tf.Example supports heterogenous lookup. // See https://abseil.io/tips/144. inline constexpr bool kFeatureMapHasHeterogeneousLookup = - Requires( + Requires( [](auto&& c) -> decltype(c.find(NoneSuch{})) {}); // Converts an `absl::string_view` into a string-type compatible for use in the From 42af713661bf7324c0a8e9b2ee21750ec417cd5e Mon Sep 17 00:00:00 2001 From: Shanbin Ke Date: Mon, 25 Sep 2023 23:24:38 -0700 Subject: [PATCH 253/567] PR #5881: [XLA:GPU] remove unused buffers to fix cudnn graph Imported from GitHub PR https://github.com/openxla/xla/pull/5881 This PR is to fix the bug introduced in commit https://github.com/openxla/xla/pull/5184/commits/25aa1baa87abb259e3e0350737c4d39a715183a0 in runner clean up PR: https://github.com/openxla/xla/pull/5184. Unused buffers in fused attention need to be removed from data_vec so cudnn graph finalization doesn't error out. Copybara import of the project: -- 0e3302504012794982a2a7acbc157767ccf07fcb by cjkkkk : rm unused buffers to fix cudnn graph -- b16f756f7f71dd6f1b5959dfe29ff8ec619bae6f by cjkkkk : fix format Merging this change closes #5881 PiperOrigin-RevId: 568432367 --- third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc b/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc index c6496ca684ab85..4ebad8290a80ee 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_dnn.cc @@ -6734,6 +6734,16 @@ class CudnnExecutionPlanRunner data_ptrs_vec.pop_back(); } + if (sizeof...(Args) == 7 || sizeof...(Args) == 11) { + // is fused attention fwd and bwd + // remove empty buffers from the list + data_ptrs_vec.erase( + std::remove(data_ptrs_vec.begin(), data_ptrs_vec.end(), nullptr), + data_ptrs_vec.end()); + // ensure the size is equal after removing useless pointers + CHECK(data_ptrs_vec.size() == data_uids_vec.size()); + } + if (should_add_scalars) { data_uids_vec.insert(data_uids_vec.end(), scalar_input_uids_.begin(), scalar_input_uids_.end()); From 8eba8a15c600fea12eb5f403eeed56ce9c315a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20Danyluk?= Date: Tue, 26 Sep 2023 00:05:36 -0700 Subject: [PATCH 254/567] [XLA:GPU] Fall back to cuBLAS in TritonAutotuner if that's faster PiperOrigin-RevId: 568440756 --- third_party/xla/xla/debug_options_flags.cc | 7 +++ third_party/xla/xla/service/gpu/BUILD | 3 ++ .../xla/xla/service/gpu/float_support_test.cc | 1 + .../xla/xla/service/gpu/gpu_compiler.cc | 12 ++++- .../gpu/ir_emitter_triton_large_test.cc | 10 +++- .../xla/service/gpu/ir_emitter_triton_test.cc | 27 ++++++---- .../xla/service/gpu/nvptx_compiler_test.cc | 12 ++++- .../xla/xla/service/gpu/triton_autotuner.cc | 54 +++++++++++++++---- .../xla/service/gpu/triton_autotuner_test.cc | 8 +-- third_party/xla/xla/xla.proto | 6 ++- 10 files changed, 111 insertions(+), 29 deletions(-) diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index f399ccd33c37a5..549b9e8846142e 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -193,6 +193,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_gpu_single_wave_autotuning(true); opts.set_xla_gpu_enable_reduction_epilogue_fusion(true); opts.set_xla_gpu_enable_nccl_clique_optimization(false); + opts.set_xla_gpu_cublas_fallback(true); return opts; } @@ -1266,6 +1267,12 @@ void MakeDebugOptionsFlags(std::vector* flag_list, &DebugOptions::set_xla_gpu_enable_nccl_clique_optimization), debug_options->xla_gpu_enable_nccl_clique_optimization(), "Allow early return when acquiring NCCL cliques")); + flag_list->push_back( + tsl::Flag("xla_gpu_cublas_fallback", + bool_setter_for(&DebugOptions::set_xla_gpu_cublas_fallback), + debug_options->xla_gpu_cublas_fallback(), + "Allow Triton GEMM autotuning to fall back to cuBLAS when that " + "is faster.")); } // NOLINT(readability/fn_size) // Allocates flag_values and flag_objects; this function must not be called more diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 3f0ca3b23d173c..eb65a040ba5708 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -605,6 +605,7 @@ xla_test( "//xla:error_spec", "//xla:xla_proto_cc", "//xla/service/gpu/tests:gpu_codegen_test", + "//xla/tests:hlo_test_base", "//xla/tests:xla_internal_test_main", # fixdeps: keep "@com_google_googletest//:gtest", ], @@ -669,6 +670,7 @@ cc_library( "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", + "@local_config_cuda//cuda:cuda_headers", "//xla:autotuning_proto_cc", "//xla:shape_util", "//xla:status_macros", @@ -2918,6 +2920,7 @@ xla_cc_test( ":nvptx_compiler_impl", "//xla:statusor", "//xla:util", + "//xla:xla_proto_cc", "//xla/hlo/ir:hlo", "//xla/service:backend", "//xla/service:buffer_assignment", diff --git a/third_party/xla/xla/service/gpu/float_support_test.cc b/third_party/xla/xla/service/gpu/float_support_test.cc index 3f0bf03a2b1487..b7e2ffd6990b60 100644 --- a/third_party/xla/xla/service/gpu/float_support_test.cc +++ b/third_party/xla/xla/service/gpu/float_support_test.cc @@ -38,6 +38,7 @@ class FloatSupportTestWithTriton : public HloTestBase { DebugOptions GetDebugOptionsForTest() override { DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); debug_options.set_xla_gpu_triton_gemm_any(true); + debug_options.set_xla_gpu_cublas_fallback(false); return debug_options; } }; diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index f025ef98cc0af9..cdf2a46ba69ed8 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -1000,10 +1000,18 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( // f32). add_float_normalization(pipeline); - TF_RETURN_IF_ERROR(AddConvAndGemmAutotuningPasses( - &pipeline, hlo_module, autotune_config, thread_pool)); TF_RETURN_IF_ERROR(AddTritonGemmAutotuningPasses( &pipeline, hlo_module, autotune_config, thread_pool)); + // Inline back the calls which have better performance with cuBLAS. + pipeline.AddPass(); + // TODO(tdanyluk): Apply CublasPadForGemms to the cuBLAS GEMMs generated + // here for possibly better cuBLAS performance. + pipeline.AddPass(gpu_version); + // Rewrite GEMMs with broadcasted inputs as strided GEMMs. + pipeline.AddPass(); + + TF_RETURN_IF_ERROR(AddConvAndGemmAutotuningPasses( + &pipeline, hlo_module, autotune_config, thread_pool)); // The Triton autotuner can insert new bf16 reductions that need to be // normalized again. diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_large_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_large_test.cc index 0ca4051230e49b..2fc8733027ac26 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_large_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_large_test.cc @@ -18,13 +18,21 @@ limitations under the License. #include #include "xla/error_spec.h" #include "xla/service/gpu/tests/gpu_codegen_test.h" +#include "xla/tests/hlo_test_base.h" #include "xla/xla.pb.h" namespace xla { namespace gpu { namespace { -using TritonGemmTest = GpuCodegenTest; +class TritonGemmTest : public GpuCodegenTest { + public: + DebugOptions GetDebugOptionsForTest() override { + DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_cublas_fallback(false); + return debug_options; + } +}; TEST_F(TritonGemmTest, IndexUsing64Bits) { const char* kHloTextRef = R"( diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 65f6906475d8e2..014b006b6ab2a7 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -57,7 +57,22 @@ namespace { namespace m = ::xla::match; -class TritonGemmNoTF32Test : public GpuCodegenTest { +class TritonGemmTest : public GpuCodegenTest { + public: + se::CudaComputeCapability GetCudaComputeCapability() { + return backend() + .default_stream_executor() + ->GetDeviceDescription() + .cuda_compute_capability(); + } + DebugOptions GetDebugOptionsForTest() override { + DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_cublas_fallback(false); + return debug_options; + } +}; + +class TritonGemmNoTF32Test : public TritonGemmTest { public: void SetUp() override { tf32_state_ = tsl::tensor_float_32_execution_enabled(); @@ -100,16 +115,6 @@ CHECK-NOT: mma )"); } -class TritonGemmTest : public GpuCodegenTest { - public: - se::CudaComputeCapability GetCudaComputeCapability() { - return backend() - .default_stream_executor() - ->GetDeviceDescription() - .cuda_compute_capability(); - } -}; - TEST_F(TritonGemmTest, DebugOptionsArePropagated) { const std::string kHloText = R"( ENTRY e { diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc b/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc index 9a76f025c9341b..9e61390635a471 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc +++ b/third_party/xla/xla/service/gpu/nvptx_compiler_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "xla/statusor.h" #include "xla/tests/hlo_test_base.h" #include "xla/util.h" +#include "xla/xla.pb.h" #include "tsl/platform/statusor.h" namespace xla { @@ -40,6 +41,15 @@ class NVPTXCompilerTest : public HloTestBase { } }; +class NVPTXCompilerTestTriton : public NVPTXCompilerTest { + public: + DebugOptions GetDebugOptionsForTest() override { + DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_cublas_fallback(false); + return debug_options; + } +}; + TEST_F(NVPTXCompilerTest, AllReducePerformedInplace) { const absl::string_view hlo_string = R"( HloModule Module, input_output_alias={ {}: (0, {}, may-alias) } @@ -96,7 +106,7 @@ ENTRY entry { all_reduce, {1}, all_reduce->operand(1), {})); } -TEST_F(NVPTXCompilerTest, +TEST_F(NVPTXCompilerTestTriton, DotDimensionAreSortedBeforePaddingForCublasEnablingTritonFusion) { MatchOptimizedHlo(R"( ENTRY e { diff --git a/third_party/xla/xla/service/gpu/triton_autotuner.cc b/third_party/xla/xla/service/gpu/triton_autotuner.cc index fd5d77258cb907..d0dc5fc2fcbe64 100644 --- a/third_party/xla/xla/service/gpu/triton_autotuner.cc +++ b/third_party/xla/xla/service/gpu/triton_autotuner.cc @@ -36,6 +36,7 @@ limitations under the License. #include "absl/synchronization/mutex.h" #include "absl/time/time.h" #include "absl/types/span.h" +#include "third_party/gpus/cuda/include/cublas_v2.h" #include "xla/autotuning.pb.h" #include "xla/hlo/ir/dfs_hlo_visitor_with_default.h" #include "xla/hlo/ir/hlo_casting_utils.h" @@ -133,14 +134,28 @@ class TritonAutotunerVisitor : public DfsHloRewriteVisitor { })); VLOG(2) << "Result: " << autotune_result.ShortDebugString(); - TF_RET_CHECK(autotune_result.has_triton()); - *backend_config.mutable_triton_gemm_config() = autotune_result.triton(); - TF_RETURN_IF_ERROR(hlo->set_backend_config(backend_config)); + if (autotune_result.has_triton()) { + *backend_config.mutable_triton_gemm_config() = autotune_result.triton(); + TF_RETURN_IF_ERROR(hlo->set_backend_config(backend_config)); + } else { + // Falling back to cuBLAS: Converting the fusion to a Call, so that it + // can be inlined back again. + HloComputation* const computation = hlo->parent(); + HloInstruction* const call = computation->AddInstruction( + HloInstruction::CreateCall(hlo->shape(), hlo->operands(), + hlo->fused_instructions_computation())); + TF_RETURN_IF_ERROR(computation->ReplaceInstruction(hlo, call)); + hlo = call; + } } - const AutotuneResult::TritonGemmKey& tiling = - backend_config.triton_gemm_config(); - if (tiling.split_k() > 1) { - TF_RETURN_IF_ERROR(MakeDotSplitKBatch(hlo, tiling)); + + // This cannot be the "else" branch of the previous "if". + if (backend_config.has_triton_gemm_config()) { + const AutotuneResult::TritonGemmKey& tiling = + backend_config.triton_gemm_config(); + if (tiling.split_k() > 1) { + TF_RETURN_IF_ERROR(MakeDotSplitKBatch(hlo, tiling)); + } } MarkAsChanged(); @@ -372,6 +387,11 @@ StatusOr> CublasGemmAutotuneExtractor( GetGpuDeviceInfo(config.GetExecutor())); TF_RETURN_IF_ERROR(rewriter.Run(new_module.get()).status()); TF_RETURN_IF_ERROR(fusion_pass.Run(new_module.get()).status()); + // TODO(tdanyluk): Consider running GemmAlgorithmPicker here for better cuBLAS + // performance. It is probably not needed on Ampere and later because cuBLAS + // ignores the algorithm parameter for those targets. If we run + // GemmAlgorithmPicker, we probably should not run this in parallel with other + // compilations. return new_module; } @@ -668,10 +688,26 @@ StatusOr Execute(const AutotuneConfig& config, VLOG(2) << "Done running."; TF_ASSIGN_OR_RETURN( - AutotuneResult best, + AutotuneResult best_triton, PickBestResult(results, root.ToString(), root.GetModule()->config())); - return best; + if (debug_opts.xla_gpu_cublas_fallback()) { + const absl::Duration best_triton_duration = + tsl::proto_utils::FromDurationProto(best_triton.run_time()); + if (cublas_duration < best_triton_duration) { + VLOG(1) << "Falling back to cuBLAS for " << fusion->name(); + + AutotuneResult cublas; + *cublas.mutable_run_time() = + tsl::proto_utils::ToDurationProto(cublas_duration); + // We will ignore this value anyway. + cublas.mutable_gemm()->set_algorithm(CUBLAS_GEMM_DEFAULT); + + return cublas; + } + } + + return best_triton; } Status DumpAutotunedFusions(const AutotuneConfig& config, diff --git a/third_party/xla/xla/service/gpu/triton_autotuner_test.cc b/third_party/xla/xla/service/gpu/triton_autotuner_test.cc index 217f030519719d..c61522ff9fb2ac 100644 --- a/third_party/xla/xla/service/gpu/triton_autotuner_test.cc +++ b/third_party/xla/xla/service/gpu/triton_autotuner_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #include "xla/service/gpu/triton_autotuner.h" #include @@ -149,6 +148,7 @@ class TritonAutotunerTest : public HloTestBase { DebugOptions GetDebugOptionsForTest() override { DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); debug_options.set_xla_gpu_enable_triton_gemm(true); + debug_options.set_xla_gpu_cublas_fallback(false); return debug_options; } @@ -459,6 +459,7 @@ class TritonAutotunerLevelTest : public HloTestBase, DebugOptions GetDebugOptionsForTest() override { DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); debug_options.set_xla_gpu_autotune_level(GetParam()); + debug_options.set_xla_gpu_cublas_fallback(false); return debug_options; } }; @@ -498,8 +499,7 @@ INSTANTIATE_TEST_SUITE_P(TritonAutotunerLevelSweep, TritonAutotunerLevelTest, class TritonAutotunerExhaustiveTest : public TritonAutotunerTest { public: DebugOptions GetDebugOptionsForTest() override { - DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); - debug_options.set_xla_gpu_enable_triton_gemm(true); + DebugOptions debug_options = TritonAutotunerTest::GetDebugOptionsForTest(); debug_options.set_xla_gpu_exhaustive_tiling_search(true); return debug_options; } @@ -543,7 +543,7 @@ ENTRY e { class TritonAutotunerDisableSplitK : public TritonAutotunerTest { public: DebugOptions GetDebugOptionsForTest() override { - DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); + DebugOptions debug_options = TritonAutotunerTest::GetDebugOptionsForTest(); debug_options.set_xla_gpu_enable_split_k_autotuning(false); return debug_options; } diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index c0c3d9aaeeeb2b..b4c617d5db56e6 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -616,7 +616,11 @@ message DebugOptions { // Replace custom calls with noop operations. bool xla_gpu_mock_custom_calls = 245; - // Next id: 247 + // Allow Triton GEMM autotuning to fall back to cuBLAS when that is + // faster. + bool xla_gpu_cublas_fallback = 247; + + // Next id: 248 // Extra options to pass to the compilation backend (e.g. LLVM); specific // interpretation of these values is left to the backend. From 8b579914b4b4ebeebdb93728fc06f256fa64bd64 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 26 Sep 2023 00:31:22 -0700 Subject: [PATCH 255/567] [stream_executor] NFC: Make stream_executor:executor_cache implementation private PiperOrigin-RevId: 568446172 --- third_party/xla/xla/service/gpu/runtime/BUILD | 3 +- third_party/xla/xla/stream_executor/BUILD | 45 +++++++++---------- .../xla/xla/stream_executor/cuda/BUILD | 1 - .../xla/xla/stream_executor/executor_cache.cc | 2 +- .../xla/xla/stream_executor/host/BUILD | 7 ++- .../xla/xla/stream_executor/rocm/BUILD | 1 - 6 files changed, 26 insertions(+), 33 deletions(-) diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index 168a6e04b071d4..fc2a57803bafe0 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -56,8 +56,7 @@ cc_library( "//xla/service/gpu:gpu_executable_run_options", "//xla/service/gpu:nccl_collective_thunks", "//xla/service/gpu:thunk", - "//xla/stream_executor:event", - "//xla/stream_executor:executor_cache", + "//xla/stream_executor", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", ], diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index ab8e0b77ef79f1..3d8b798c4b3389 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -247,6 +247,26 @@ filegroup( # implementation) or `stream_executor_headers` (only headers, if there is a reason not to link # implementation) if they want to use StreamExecutor. +cc_library( + name = "executor_cache", + srcs = ["executor_cache.cc"], + hdrs = ["executor_cache.h"], + visibility = ["//visibility:public"], + deps = [ + ":platform", + ":stream_executor_headers", + "//xla/stream_executor/platform", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", + "@local_tsl//tsl/platform:statusor", + ], +) + cc_library( name = "kernel_spec", srcs = ["kernel_spec.cc"], @@ -379,30 +399,6 @@ cc_library( ], ) -cc_library( - name = "executor_cache", - srcs = ["executor_cache.cc"], - hdrs = ["executor_cache.h"], - visibility = ["//visibility:public"], - deps = [ - ":platform", - ":stream_executor_headers", - "//xla/stream_executor/platform", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/container:node_hash_map", - "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/log", - "@com_google_absl//absl/log:check", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:statusor", - ], -) - cc_library( name = "multi_platform_manager", srcs = ["multi_platform_manager.cc"], @@ -747,6 +743,7 @@ cc_library( ":dnn", ":dnn_proto_cc", ":event", + ":executor_cache", ":kernel", ":kernel_spec", ":multi_platform_manager", diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index 3b5e64c27d93ea..01b9bb3c68a497 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -59,7 +59,6 @@ cc_library( ":cuda_platform_id", ":cuda_activation", "//xla/stream_executor", # buildcleaner: keep - "//xla/stream_executor:executor_cache", "//xla/stream_executor:multi_platform_manager", "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/platform", diff --git a/third_party/xla/xla/stream_executor/executor_cache.cc b/third_party/xla/xla/stream_executor/executor_cache.cc index 10b3627f7aee3a..994a3e44179355 100644 --- a/third_party/xla/xla/stream_executor/executor_cache.cc +++ b/third_party/xla/xla/stream_executor/executor_cache.cc @@ -23,7 +23,7 @@ limitations under the License. #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" #include "xla/stream_executor/platform.h" -#include "xla/stream_executor/stream_executor_pimpl.h" +#include "xla/stream_executor/stream_executor.h" #include "tsl/platform/statusor.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/host/BUILD b/third_party/xla/xla/stream_executor/host/BUILD index d6b2d697af45d5..30bfc2bacdb8ee 100644 --- a/third_party/xla/xla/stream_executor/host/BUILD +++ b/third_party/xla/xla/stream_executor/host/BUILD @@ -42,9 +42,8 @@ cc_library( deps = [ ":host_gpu_executor", ":host_platform_id", - "//xla/stream_executor:executor_cache", + "//xla/stream_executor", "//xla/stream_executor:multi_platform_manager", - "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/platform", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings:str_format", @@ -63,7 +62,8 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - "//xla/stream_executor:kernel", + "//xla/stream_executor", + "//xla/stream_executor:stream_executor_internal", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/synchronization", "@local_tsl//tsl/platform:denormal", @@ -86,7 +86,6 @@ cc_library( ":host_platform_id", ":host_stream", "//xla/stream_executor", - "//xla/stream_executor:kernel", "//xla/stream_executor:stream_executor_internal", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/strings", diff --git a/third_party/xla/xla/stream_executor/rocm/BUILD b/third_party/xla/xla/stream_executor/rocm/BUILD index 57826e043149ca..74f710420f6e40 100644 --- a/third_party/xla/xla/stream_executor/rocm/BUILD +++ b/third_party/xla/xla/stream_executor/rocm/BUILD @@ -159,7 +159,6 @@ cc_library( "@com_google_absl//absl/base", "@com_google_absl//absl/memory", "//xla/stream_executor", # buildcleaner: keep - "//xla/stream_executor:executor_cache", "//xla/stream_executor:multi_platform_manager", "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/platform", From 2f94ec0c2ed3d16d846f0244c6f8ddcb1ae56e86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 26 Sep 2023 02:03:33 -0700 Subject: [PATCH 256/567] compat: Update forward compatibility horizon to 2023-09-26 PiperOrigin-RevId: 568466907 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index dbb8b5b32bce92..bc3240a9edd402 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 25) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2023, 9, 26) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From a6815d063b081eab127143de6316681847fecfb0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 26 Sep 2023 02:03:33 -0700 Subject: [PATCH 257/567] Update GraphDef version to 1631. PiperOrigin-RevId: 568466908 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 3f5ae3ed744546..06e48524defcb2 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1630 // Updated: 2023/9/25 +#define TF_GRAPH_DEF_VERSION 1631 // Updated: 2023/9/26 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From c2ae731c249b9ad452304a505929a7de8faf12a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 26 Sep 2023 02:06:31 -0700 Subject: [PATCH 258/567] Make SimplifyFPConversions check creation pass ids This is adding an additional model to `SimplifyFPConversions` which will only simplify chains of converts if they were generated by a previous optimization pass. The pass is checking whether these convert ops were created by an optimization pass or part of the input HLO by checking the `logical_creation_pass_id` metadata field. A pass id of -1 means they were part of the input. Note that this changing is only preparing the pass and adding changes, but it is not yet changing any behaviour in XLA. PiperOrigin-RevId: 568467656 --- third_party/xla/xla/service/BUILD | 6 +- .../xla/xla/service/gpu/gpu_compiler.cc | 3 +- .../xla/service/simplify_fp_conversions.cc | 37 +++++++- .../xla/xla/service/simplify_fp_conversions.h | 21 ++++- .../service/simplify_fp_conversions_test.cc | 88 ++++++++++++++++++- 5 files changed, 144 insertions(+), 11 deletions(-) diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index b29f6d1a832b07..f20a5fcf8e77b8 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -2525,8 +2525,12 @@ cc_library( deps = [ ":hlo_pass", "//xla:comparison_util", + "//xla:statusor", + "//xla:util", "//xla/hlo/ir:hlo", - "@local_tsl//tsl/platform:statusor", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings:str_format", + "@local_tsl//tsl/platform:errors", ], ) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index cdf2a46ba69ed8..db3c2cec28c571 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -921,7 +921,8 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment( sub_pipeline.AddPass(&f8e4m3fnuz_support); // Remove `f32 -> bf16 -> f32` casts inserted by bf16 normalization. if (debug_options.xla_gpu_simplify_all_fp_conversions()) { - sub_pipeline.AddPass(); + sub_pipeline.AddPass( + SimplifyFPConversions::Scope::kSimplifyAllConversions); } }; diff --git a/third_party/xla/xla/service/simplify_fp_conversions.cc b/third_party/xla/xla/service/simplify_fp_conversions.cc index 8651a5550c5ba3..22fbf3961506c9 100644 --- a/third_party/xla/xla/service/simplify_fp_conversions.cc +++ b/third_party/xla/xla/service/simplify_fp_conversions.cc @@ -15,23 +15,35 @@ limitations under the License. #include "xla/service/simplify_fp_conversions.h" +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/str_format.h" #include "xla/hlo/ir/hlo_computation.h" #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_opcode.h" #include "xla/primitive_util.h" -#include "tsl/platform/statusor.h" +#include "xla/statusor.h" +#include "xla/util.h" +#include "tsl/platform/errors.h" namespace xla { namespace { // Simplifies floating-point conversions `A -> B -> C -> D` as `A -> D`. -StatusOr RunOnComputation(HloComputation& computation) { +StatusOr RunOnComputation(HloComputation& computation, + SimplifyFPConversions::Scope scope) { + const int minimum_logical_creation_pass_id = + (scope == SimplifyFPConversions::Scope::kSimplifyAllConversions) ? -1 : 0; bool changed = false; for (HloInstruction* instruction : computation.MakeInstructionPostOrder()) { HloInstruction* input = instruction; size_t convert_chain_length = 0; while ((input->opcode() == HloOpcode::kConvert) && + (input->metadata().logical_creation_pass_id() >= + minimum_logical_creation_pass_id) && primitive_util::IsFloatingPointType(input->shape().element_type())) { input = input->mutable_operand(0); ++convert_chain_length; @@ -52,17 +64,36 @@ StatusOr RunOnComputation(HloComputation& computation) { return changed; } +std::string ToString(SimplifyFPConversions::Scope scope) { + using Scope = SimplifyFPConversions::Scope; + switch (scope) { + case Scope::kSimplifyAllConversions: + return "SimplifyAllConversions"; + case Scope::kOnlySimplifyCompilerGeneratedConversions: + return "OnlySimplifyCompilerGeneratedConversions"; + } +} + } // namespace StatusOr SimplifyFPConversions::Run( HloModule* module, const absl::flat_hash_set& execution_threads) { + XLA_VLOG_LINES( + 2, + absl::StrFormat("SimplifyFPConversions::Run() with scope=%s, before:\n%s", + ToString(scope_), module->ToString())); bool changed = false; for (HloComputation* computation : module->MakeComputationPostOrder(execution_threads)) { - TF_ASSIGN_OR_RETURN(bool comp_changed, RunOnComputation(*computation)); + TF_ASSIGN_OR_RETURN(bool comp_changed, + RunOnComputation(*computation, scope_)); changed |= comp_changed; } + XLA_VLOG_LINES( + 2, + absl::StrFormat("SimplifyFPConversions::Run() with scope=%s, after:\n%s", + ToString(scope_), module->ToString())); return changed; } diff --git a/third_party/xla/xla/service/simplify_fp_conversions.h b/third_party/xla/xla/service/simplify_fp_conversions.h index 5951148a54272e..3904fa37505a52 100644 --- a/third_party/xla/xla/service/simplify_fp_conversions.h +++ b/third_party/xla/xla/service/simplify_fp_conversions.h @@ -16,9 +16,10 @@ limitations under the License. #ifndef XLA_SERVICE_SIMPLIFY_FP_CONVERSIONS_H_ #define XLA_SERVICE_SIMPLIFY_FP_CONVERSIONS_H_ +#include "absl/container/flat_hash_set.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/service/hlo_pass_interface.h" -#include "tsl/platform/statusor.h" +#include "xla/statusor.h" namespace xla { @@ -26,16 +27,30 @@ namespace xla { // // The algebraic simplifier will remove convert pairs of the form `X -> Y -> X`, // only when they are a no-op (e.g. `bf16 -> f32 -> bf16`). This passes does -// similar, but will simplify any chain of float conversions, possibly improving -// accuracy (e.g. `f32 -> bf16 -> f32` is removed). +// similar, but has two scopes: +// - kSimplifyAllConversions: Simplify any chain of float conversions, possibly +// improving accuracy (e.g. `f32 -> bf16 -> f32` is removed). +// - kOnlySimplifyCompilerGeneratedConversions: Only simplify chains of float +// conversions generated by the compiler in one of the previous optimization +// passes. class SimplifyFPConversions : public HloModulePass { public: + enum class Scope { + kOnlySimplifyCompilerGeneratedConversions, + kSimplifyAllConversions + }; + + explicit SimplifyFPConversions(Scope scope) : scope_(scope) {} + absl::string_view name() const override { return "simplify-fp-conversions"; } using HloPassInterface::Run; StatusOr Run( HloModule* module, const absl::flat_hash_set& execution_threads) override; + + private: + Scope scope_; }; } // namespace xla diff --git a/third_party/xla/xla/service/simplify_fp_conversions_test.cc b/third_party/xla/xla/service/simplify_fp_conversions_test.cc index 9d60d68e395160..fae975963adb46 100644 --- a/third_party/xla/xla/service/simplify_fp_conversions_test.cc +++ b/third_party/xla/xla/service/simplify_fp_conversions_test.cc @@ -15,8 +15,13 @@ limitations under the License. #include "xla/service/simplify_fp_conversions.h" +#include + #include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_computation.h" +#include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" +#include "xla/hlo/ir/hlo_opcode.h" #include "xla/hlo/utils/hlo_matchers.h" #include "xla/tests/hlo_test_base.h" #include "tsl/platform/status_matchers.h" @@ -31,6 +36,32 @@ using ::tsl::testing::IsOkAndHolds; using SimplifyFPConversionsTest = HloTestBase; +// This marks all ops in `module` as user-provided, meaning the +// simplifier won't remove any of the converts +static void InitializeCreationPassIds(HloModule* module) { + constexpr int kUserSuppliedOpCreationPassId = -1; + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + instruction->set_creation_pass_id(kUserSuppliedOpCreationPassId); + instruction->set_logical_creation_pass_id(kUserSuppliedOpCreationPassId); + } + } +} + +// This marks all converts ops in `module` as being created by the +// optimization pass `creation_pass_id`. +static void SetCreationPassIdInAllConvertOps(HloModule* module, + int creation_pass_id) { + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kConvert) { + instruction->set_creation_pass_id(creation_pass_id); + instruction->set_logical_creation_pass_id(creation_pass_id); + } + } + } +} + TEST_F(SimplifyFPConversionsTest, DoesNotChangeSingleConvert) { const absl::string_view kModuleStr = R"( HloModule test @@ -43,8 +74,10 @@ TEST_F(SimplifyFPConversionsTest, DoesNotChangeSingleConvert) { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleStr)); + InitializeCreationPassIds(module.get()); - SimplifyFPConversions simplifier; + SimplifyFPConversions simplifier{ + SimplifyFPConversions::Scope::kSimplifyAllConversions}; EXPECT_THAT(simplifier.Run(module.get()), IsOkAndHolds(false)); } @@ -61,13 +94,60 @@ TEST_F(SimplifyFPConversionsTest, SimplifiesF32ToBF16ToF32) { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleStr)); + InitializeCreationPassIds(module.get()); + + SimplifyFPConversions simplifier{ + SimplifyFPConversions::Scope::kSimplifyAllConversions}; + EXPECT_THAT(simplifier.Run(module.get()), IsOkAndHolds(true)); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Parameter(0))); +} + +TEST_F(SimplifyFPConversionsTest, SimplifiesCompilerGeneratedF32ToBF16ToF32) { + const absl::string_view kModuleStr = R"( + HloModule test + + ENTRY entry { + p0 = f32[2,3] parameter(0) + c0 = bf16[2,3] convert(p0) + c1 = f32[2,3] convert(c0) + ROOT ret = (f32[2,3]) tuple(c1) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleStr)); + InitializeCreationPassIds(module.get()); + + constexpr int kRandomCreationPassId = 42; + SetCreationPassIdInAllConvertOps(module.get(), kRandomCreationPassId); - SimplifyFPConversions simplifier; + SimplifyFPConversions simplifier{ + SimplifyFPConversions::Scope::kOnlySimplifyCompilerGeneratedConversions}; EXPECT_THAT(simplifier.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(module->entry_computation()->root_instruction(), op::Tuple(op::Parameter(0))); } +TEST_F(SimplifyFPConversionsTest, DoesNotChangeUserInsertedConverts) { + const absl::string_view kModuleStr = R"( + HloModule test + + ENTRY entry { + p0 = f32[2,3] parameter(0) + c0 = bf16[2,3] convert(p0) + c1 = f32[2,3] convert(c0) + ROOT ret = (f32[2,3]) tuple(c1) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleStr)); + InitializeCreationPassIds(module.get()); + + SimplifyFPConversions simplifier{ + SimplifyFPConversions::Scope::kOnlySimplifyCompilerGeneratedConversions}; + EXPECT_THAT(simplifier.Run(module.get()), IsOkAndHolds(false)); +} + TEST_F(SimplifyFPConversionsTest, SimplifiesF64ToF16ToF32ToBF16) { const absl::string_view kModuleStr = R"( HloModule test @@ -82,8 +162,10 @@ TEST_F(SimplifyFPConversionsTest, SimplifiesF64ToF16ToF32ToBF16) { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleStr)); + InitializeCreationPassIds(module.get()); - SimplifyFPConversions simplifier; + SimplifyFPConversions simplifier{ + SimplifyFPConversions::Scope::kSimplifyAllConversions}; EXPECT_THAT(simplifier.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT( module->entry_computation()->root_instruction(), From a1fa36df2097fed9b3f912ca02b737dde5ba81e2 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Tue, 26 Sep 2023 02:20:28 -0700 Subject: [PATCH 259/567] [XLA:GPU] Fix detection of unsupported transposes in Triton GEMM fusions. PiperOrigin-RevId: 568470689 --- .../xla/service/gpu/gemm_rewriter_triton.cc | 37 ++++++++----------- .../service/gpu/gemm_rewriter_triton_test.cc | 14 +++++++ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc index 8f1e0399ddb13a..59cecc4dba6e77 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc @@ -554,54 +554,48 @@ FusionDecision FusionContext::RequireSupportedDimOrder( const DimensionOrder& order, int64_t& split_dim_major_part) const { VLOG(8) << order.ToString(); const Fragments& tensor_dim_fragments = order.TensorFragmentsOrder(); - auto non_major_fragment_is_sliced = [&](int fragment, int distance_to_end) { - return distance_to_end > 1 && tensor_dim_fragments[fragment].is_sliced(); - }; for (const auto& [dim_index, dim_fragments] : order.DimFragmentsOrders()) { - int split_counter = -1; + CHECK(!dim_fragments.empty()); + for (int i = 0; i < dim_fragments.size() - 1; ++i) { + if (tensor_dim_fragments[dim_fragments[i]].is_sliced()) { + return "Sliced non-major-most fragment."; + } + } + int group_counter = 0; + int last_seen_group_last_fragment_index = -1; auto fragment_it = dim_fragments.cbegin(); - // TODO(b/300892934): simplify the logic. while (true) { if (fragment_it == dim_fragments.cend()) { break; } - if (non_major_fragment_is_sliced(*fragment_it, - dim_fragments.cend() - fragment_it)) { - return "Sliced non-major-most fragment."; - } int64_t grouped_size = tensor_dim_fragments[*fragment_it].full_size(); - // Gather contiguous fragments. + // Gather contiguous fragments: they have consecutive indices. while ((fragment_it + 1) != dim_fragments.cend() && *(fragment_it + 1) == *fragment_it + 1) { ++fragment_it; - if (non_major_fragment_is_sliced(*fragment_it, - dim_fragments.cend() - fragment_it)) { - return "Sliced non-major-most fragment."; - } grouped_size *= tensor_dim_fragments[*fragment_it].full_size(); } - + // Ignore 1-sized groups of fragments. if (grouped_size == 1) { ++fragment_it; continue; } - if (fragment_it != dim_fragments.cbegin() && - *fragment_it < *(fragment_it - 1)) { + if (last_seen_group_last_fragment_index > *fragment_it) { return "Transpose within a dimension."; } - ++split_counter; - if (split_counter > 0) { + ++group_counter; + if (group_counter > 1) { if (dim_index == SplittableDimensionIndex() && IsSupportedSplittableDimensionMajorPartSize(grouped_size)) { - if (split_counter == 1) { + if (group_counter == 2) { if (split_dim_major_part != 0 && split_dim_major_part != grouped_size) { return "Conflicting splits of splittable dimension"; } split_dim_major_part = grouped_size; - } else if (split_counter > 1) { + } else if (group_counter > 2) { return "2nd split of a splittable dimension."; } } else { @@ -609,6 +603,7 @@ FusionDecision FusionContext::RequireSupportedDimOrder( } } + last_seen_group_last_fragment_index = *fragment_it; ++fragment_it; } } diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc index b91abc7f617ffe..c72884cf3e912e 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton_test.cc @@ -100,6 +100,20 @@ ENTRY e { GmockMatch(m::Fusion(m::Parameter(), m::Parameter()))); } +TEST_F(GemmRewriterTritonTest, UnsupportedTransposeIsNotFused) { + auto module = ParseAndReturnVerifiedModule(R"( +ENTRY e { + p0 = f16[1,512,8,1024]{3,1,0,2} parameter(0) + c = f16[1,512,8,1024]{3,2,1,0} copy(p0) + b = f16[4096,1024]{1,0} bitcast(c) + p1 = f16[128,1024]{1,0} parameter(1) + ROOT d = f16[4096,128]{1,0} dot(b, p1), + lhs_contracting_dims={1}, rhs_contracting_dims={1} +})") + .value(); + EXPECT_FALSE(GemmRewriterTriton(gpu_version_).Run(module.get()).value()); +} + TEST_F(GemmRewriterTritonTest, BitcastChain) { // This HLO is artificial because unnecessary reshapes get optimized // out during compilation. It tests the ability of GemmRewriterTriton From 6c4d42bc1f1de15486ef50a54b99543a6ae9c679 Mon Sep 17 00:00:00 2001 From: TJ Xu Date: Tue, 26 Sep 2023 03:08:47 -0700 Subject: [PATCH 260/567] PR #5473: [NVIDIA XLA GPU] Added an pass to unroll loops by a factor of 2 to achieve double buffering Imported from GitHub PR https://github.com/openxla/xla/pull/5473 This only deals with loops with known trip count. For loops with even trip count, we unroll directly. For loops with odd trip count, we take out 1 iteration into the main computation outside of the main loop and unroll the rest by a factor of 2. Copybara import of the project: -- ff05af9fb793a1b14ffc8de6ba327c20ef339d38 by TJ : Add a new experimental pass called test_double_buffer It's placed before async creator for now to unroll while loops to achieve double buffering indirectly. However, control depdencies still need to injected between two loop iterations to ensure execution order -- 293481cd42102f2fce8bddf3febffa9a0b6aec50 by TJ : add support for loops with odd trip count -- b304ec3f6f9a8ef314da0f302c7e7d527e147e4c by TJ : added a separate file to host loop double buffer transformer tests refactored code in loop double buffer transformer added a new test in copy insertion test to verify double buffer -- 49af20bee3d43208a3c517acae0f488d58fa6b4c by TJ : simplify copy loop for unrolling and peeling even further -- 0858b06a3128e64129ad91c22f33baf698d743d4 by TJ : added more tests to cover channel id and control deps Merging this change closes #5473 PiperOrigin-RevId: 568481154 --- third_party/xla/xla/debug_options_flags.cc | 8 + third_party/xla/xla/service/gpu/BUILD | 49 +++ .../xla/xla/service/gpu/gpu_compiler.cc | 7 + .../service/gpu/gpu_copy_insertion_test.cc | 65 ++++ .../gpu/loop_double_buffer_transformer.cc | 267 +++++++++++++ .../gpu/loop_double_buffer_transformer.h | 55 +++ .../loop_double_buffer_transformer_test.cc | 361 ++++++++++++++++++ third_party/xla/xla/xla.proto | 5 +- 8 files changed, 816 insertions(+), 1 deletion(-) create mode 100644 third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc create mode 100644 third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h create mode 100644 third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index 549b9e8846142e..97d2c5dfab2c74 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -194,6 +194,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_gpu_enable_reduction_epilogue_fusion(true); opts.set_xla_gpu_enable_nccl_clique_optimization(false); opts.set_xla_gpu_cublas_fallback(true); + opts.set_xla_gpu_enable_while_loop_double_buffering(false); + return opts; } @@ -1273,6 +1275,12 @@ void MakeDebugOptionsFlags(std::vector* flag_list, debug_options->xla_gpu_cublas_fallback(), "Allow Triton GEMM autotuning to fall back to cuBLAS when that " "is faster.")); + flag_list->push_back(tsl::Flag( + "xla_gpu_enable_while_loop_double_buffering", + bool_setter_for( + &DebugOptions::set_xla_gpu_enable_while_loop_double_buffering), + debug_options->xla_gpu_enable_while_loop_double_buffering(), + "Enable double buffering for while loop")); } // NOLINT(readability/fn_size) // Allocates flag_values and flag_objects; this function must not be called more diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index eb65a040ba5708..1c76f62f78b7b1 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -2622,6 +2622,7 @@ cc_library( ":instruction_fusion", ":ir_emission_utils", ":ir_emitter", + ":loop_double_buffer_transformer", ":matmul_utils", ":metrics", ":move_copy_to_users", @@ -4605,3 +4606,51 @@ xla_cc_test( "@local_tsl//tsl/lib/core:status_test_util", ], ) + +cc_library( + name = "loop_double_buffer_transformer", + srcs = ["loop_double_buffer_transformer.cc"], + hdrs = ["loop_double_buffer_transformer.h"], + visibility = ["//visibility:public"], + deps = [ + ":gpu_types", + "//xla:status", + "//xla:statusor", + "//xla:util", + "//xla:xla_data_proto_cc", + "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_query", + "//xla/service:collective_ops_utils", + "//xla/service:hlo_pass", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:statusor", + ], +) + +xla_cc_test( + name = "loop_double_buffer_transformer_test", + srcs = if_gpu_is_configured(["loop_double_buffer_transformer_test.cc"]), + tags = tf_cuda_tests_tags(), + deps = if_gpu_is_configured([ + ":gpu_compiler", + ":loop_double_buffer_transformer", + "//xla:test", + "//xla/hlo/ir:hlo", + "//xla/service:tuple_simplifier", + "//xla/tests:hlo_test_base", + "//xla/tests:xla_internal_test_main", + ]) + [ + "//xla:test_helpers", + "//xla:xla_data_proto_cc", + "//xla:xla_proto_cc", + "//xla/service:hlo_dce", + "@com_google_absl//absl/container:flat_hash_set", + "@local_tsl//tsl/platform:statusor", + ], +) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index db3c2cec28c571..c6b64525105223 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -116,6 +116,7 @@ limitations under the License. #include "xla/service/gpu/hlo_fusion_stats.h" #include "xla/service/gpu/horizontal_loop_fusion.h" #include "xla/service/gpu/ir_emission_utils.h" +#include "xla/service/gpu/loop_double_buffer_transformer.h" #include "xla/service/gpu/matmul_utils.h" #include "xla/service/gpu/metrics.h" #include "xla/service/gpu/move_copy_to_users.h" @@ -783,6 +784,12 @@ Status GpuCompiler::OptimizeHloModule(HloModule* hlo_module, pipeline.AddPass(blueconnect_num_devices_per_host); } + if (debug_options.xla_gpu_enable_while_loop_double_buffering()) { + pipeline.AddPass(); + pipeline.AddPass(); + pipeline.AddPass(); + } + { // Convert all collectives to their async form, and then annotate the ones // that actually need to run asynchronously with a GPU specific backend diff --git a/third_party/xla/xla/service/gpu/gpu_copy_insertion_test.cc b/third_party/xla/xla/service/gpu/gpu_copy_insertion_test.cc index 8fe7547b870d29..3f3bf3c3cf1d74 100644 --- a/third_party/xla/xla/service/gpu/gpu_copy_insertion_test.cc +++ b/third_party/xla/xla/service/gpu/gpu_copy_insertion_test.cc @@ -504,6 +504,71 @@ ENTRY main { ExpectOptionalFalse(FusionCanShareBufferHint(fusion, fusion->operand(0), {})); } +// For loops unrolled with double buffering, +// copyInsertion should not insert any copy. +TEST_F(GpuCopyInsertionTest, UnrolledLoopShouldNotHaveCopy) { + const char* const kModuleString = R"( +HloModule all_gather_overlapping, entry_computation_layout={(f32[1,128]{1,0}, f32[2,128]{1,0})->(f32[1,128]{1,0}, f32[1,128]{1,0}, f32[2,128]{1,0}, s32[])} + +body { + input_tuple_while = (f32[1,128]{1,0}, f32[1,128]{1,0}, f32[2,128]{1,0}, s32[]) parameter(0) + param_1 = f32[2,128]{1,0} get-tuple-element(input_tuple_while), index=2 + c1_s32 = s32[] constant(1) + c0_s32 = s32[] constant(0) + dynamic-slice = f32[1,128]{1,0} dynamic-slice(param_1, c1_s32, c0_s32), dynamic_slice_sizes={1,128} + param_0 = f32[1,128]{1,0} get-tuple-element(input_tuple_while), index=0 + cond.1 = s32[] get-tuple-element(input_tuple_while), index=3 + cond_plus_1 = s32[] add(cond.1, c1_s32) + c0 = f32[] constant(0) + splat_c0 = f32[1,128]{1,0} broadcast(c0), dimensions={} + add = f32[1,128]{1,0} add(splat_c0, param_0) + all-gather-start = (f32[1,128]{1,0}, f32[2,128]{1,0}) all-gather-start(add), channel_id=1337, replica_groups={{0,1}}, dimensions={0}, use_global_device_ids=true + all-gather-done = f32[2,128]{1,0} all-gather-done(all-gather-start) + dynamic-slice.double_buffer_clone = f32[1,128]{1,0} dynamic-slice(all-gather-done, c1_s32, c0_s32), dynamic_slice_sizes={1,128} + splat_c0_unrolled = f32[1,128]{1,0} broadcast(c0), dimensions={} + add.double_buffer_clone = f32[1,128]{1,0} add(splat_c0_unrolled, param_0) + all-gather-start-unrolled = (f32[1,128]{1,0}, f32[2,128]{1,0}) all-gather-start(add.double_buffer_clone), channel_id=1339, replica_groups={{0,1}}, dimensions={0}, use_global_device_ids=true + all-gather-done-unrolled = f32[2,128]{1,0} all-gather-done(all-gather-start-unrolled) + one.2 = s32[] constant(1) + cond_plus_1.double_buffer_clone = s32[] add(cond_plus_1, one.2) + ROOT output_tuple = (f32[1,128]{1,0}, f32[1,128]{1,0}, f32[2,128]{1,0}, s32[]) tuple(param_0, dynamic-slice.double_buffer_clone, all-gather-done-unrolled, cond_plus_1.double_buffer_clone) +} + +condition { + input_tuple = (f32[1,128]{1,0}, f32[1,128]{1,0}, f32[2,128]{1,0}, s32[]) parameter(0) + cond = s32[] get-tuple-element(input_tuple), index=3 + trip_count = s32[] constant(10) + ROOT done = pred[] compare(cond, trip_count), direction=LT +} + +ENTRY main { + input_param_0 = f32[1,128]{1,0} parameter(0) + input_param_1 = f32[2,128]{1,0} parameter(1) + constant_1 = s32[] constant(1) + constant_0 = s32[] constant(0) + dynamic-slice-main = f32[1,128]{1,0} dynamic-slice(input_param_1, constant_1, constant_0), dynamic_slice_sizes={1,128} + float0 = f32[] constant(0) + splat_float0 = f32[1,128]{1,0} broadcast(float0), dimensions={} + add.peeled_double_buffer = f32[1,128]{1,0} add(splat_float0, input_param_0) + all-gather-start-main = (f32[1,128]{1,0}, f32[2,128]{1,0}) all-gather-start(add.peeled_double_buffer), channel_id=1338, replica_groups={{0,1}}, dimensions={0}, use_global_device_ids=true + all-gather-done-main = f32[2,128]{1,0} all-gather-done(all-gather-start-main) + param_2 = s32[] constant(0) + cond_plus_1.peeled_double_buffer = s32[] add(param_2, constant_1) + tuple = (f32[1,128]{1,0}, f32[1,128]{1,0}, f32[2,128]{1,0}, s32[]) tuple(input_param_0, dynamic-slice-main, all-gather-done-main, cond_plus_1.peeled_double_buffer) + ROOT while = (f32[1,128]{1,0}, f32[1,128]{1,0}, f32[2,128]{1,0}, s32[]) while(tuple), condition=condition, body=body +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleString)); + + CopyInsertion copy_insertion(FusionCanShareBufferHint, + /*use_region_based_live_range_analysis=*/0); + ASSERT_IS_OK(copy_insertion.Run(module.get(), {"foobar"}).status()); + VLOG(2) << module->ToString(); + EXPECT_EQ(CountCopies(*module), 0); +} + } // namespace } // namespace gpu } // namespace xla diff --git a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc b/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc new file mode 100644 index 00000000000000..113970552597a4 --- /dev/null +++ b/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc @@ -0,0 +1,267 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "xla/service/gpu/loop_double_buffer_transformer.h" + +#include +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_casting_utils.h" +#include "xla/hlo/ir/hlo_clone_context.h" +#include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_instructions.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/hlo/ir/hlo_opcode.h" +#include "xla/hlo/utils/hlo_query.h" +#include "xla/service/collective_ops_utils.h" +#include "xla/status.h" +#include "xla/statusor.h" +#include "xla/util.h" +#include "xla/xla_data.pb.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/statusor.h" + +namespace xla { +namespace gpu { + +namespace { + +void SetChannelIdForNewCollective(HloInstruction* new_instr, + const HloModule* module) { + // This is to track mappings of old->new channel id for async collectives + // wrapped in the form of HloAsyncInstruction, the start and done need to + // have the same unique channel id. + absl::flat_hash_map old_to_new_channel_id_map; + absl::flat_hash_map channel_id_comp_map; + if (HloAsyncInstruction::ClassOf(new_instr) && + hlo_query::IsCollectiveCommunicationOp( + DynCast(new_instr) + ->async_wrapped_instruction() + ->opcode())) { + HloInstruction* wrapped_instr = + DynCast(new_instr)->async_wrapped_instruction(); + int64_t old_channel_id = *wrapped_instr->channel_id(); + int64_t new_channel_id = old_to_new_channel_id_map[old_channel_id]; + if (old_to_new_channel_id_map.find(old_channel_id) == + old_to_new_channel_id_map.end()) { + new_channel_id = hlo_query::NextChannelId(*module); + VLOG(2) << "Generated new channel id " << new_channel_id; + old_to_new_channel_id_map[old_channel_id] = new_channel_id; + } + + VLOG(2) << "Setting channel id to " << new_channel_id; + + wrapped_instr->set_channel_id(new_channel_id); + if (channel_id_comp_map.find(new_channel_id) == channel_id_comp_map.end()) { + channel_id_comp_map[new_channel_id] = new_instr->called_computations()[0]; + } else { + channel_id_comp_map[new_channel_id]->AddAsyncInstruction(*new_instr); + } + } else if (hlo_query::IsCollectiveCommunicationOp(new_instr->opcode()) || + hlo_query::IsAsyncCollectiveStartOp(new_instr->opcode())) { + new_instr->set_channel_id(hlo_query::NextChannelId(*module)); + } +} + +Status PeelInstructionsForOddTripCount(HloModule* module, + HloInstruction* while_instr) { + HloCloneContext context(module, "peeled_double_buffer"); + + absl::flat_hash_map old_to_new_map; + HloComputation* while_body = while_instr->while_body(); + HloInstruction* input_parameter = while_body->parameter_instruction(0); + HloInstruction* input_tuple = while_instr->mutable_operand(0); + CHECK(input_tuple->opcode() == HloOpcode::kTuple); + + auto old_loop_roots = while_body->root_instruction()->mutable_operands(); + HloComputation* parent_comp = while_instr->parent(); + old_to_new_map[input_parameter] = input_tuple; + + for (HloInstruction* old_instr : while_body->MakeInstructionPostOrder()) { + if (old_to_new_map.find(old_instr) != old_to_new_map.end()) { + continue; + } + VLOG(2) << "Peeling instruction " << old_instr->ToString(); + std::vector new_operands(old_instr->operand_count()); + for (int64_t i = 0; i < old_instr->operand_count(); i++) { + new_operands[i] = old_to_new_map[old_instr->mutable_operand(i)]; + } + HloInstruction* new_instr = + parent_comp->AddInstruction(old_instr->CloneWithNewOperands( + old_instr->shape(), new_operands, &context)); + + SetChannelIdForNewCollective(new_instr, module); + old_to_new_map[old_instr] = new_instr; + VLOG(2) << "Added instruction " << new_instr->ToString() + << " to parent computation."; + } + + std::vector new_roots; + for (HloInstruction* instr : old_loop_roots) { + new_roots.push_back(old_to_new_map[instr]); + } + TF_RETURN_IF_ERROR(while_instr->ReplaceOperandWith( + 0, old_to_new_map[while_body->root_instruction()])); + VLOG(2) << "Replaced with new input tuple " + << while_instr->operand(0)->ToString(); + + // Handle existing control dependencies. + for (HloInstruction* old_instr : while_body->MakeInstructionPostOrder()) { + if (old_to_new_map.find(old_instr) != old_to_new_map.end()) { + HloInstruction* new_instr = old_to_new_map[old_instr]; + VLOG(2) << "Processing control predecessors for peeled instruction " + << new_instr->ToString(); + std::vector new_control_pred( + old_instr->control_predecessors().size()); + for (HloInstruction* pred : old_instr->control_predecessors()) { + new_control_pred.push_back(old_to_new_map[pred]); + } + + TF_RETURN_IF_ERROR(new_instr->DropAllControlDeps()); + for (HloInstruction* new_pred : new_control_pred) { + TF_RETURN_IF_ERROR(new_pred->AddControlDependencyTo(new_instr)); + VLOG(2) << "Adding " << new_pred->ToString() + << " to control dependency of peeled instruction: " + << new_instr->ToString(); + } + } + } + return OkStatus(); +} +} // namespace + +StatusOr LoopDoubleBufferTransformer::Run( + HloModule* module, + const absl::flat_hash_set& execution_threads) { + bool changed = false; + std::vector while_instrs; + absl::c_copy_if(module->entry_computation()->instructions(), + std::back_inserter(while_instrs), + HloPredicateIsOp); + VLOG(2) << "Processing " << while_instrs.size() << " while loops."; + + for (HloInstruction* while_instr : while_instrs) { + TF_ASSIGN_OR_RETURN(WhileLoopBackendConfig config, + while_instr->backend_config()); + if (!config.has_known_trip_count()) { + VLOG(2) << while_instr->ToString() + << " doesn't have exact trip count, skipping double buffering " + "for now"; + continue; + } + int64_t exact_trip_count = config.known_trip_count().n(); + VLOG(2) << "Processing while loop " << while_instr->ToString() + << " with trip count: " << exact_trip_count; + + HloComputation* while_body = while_instr->while_body(); + + CHECK(while_body->root_instruction()->opcode() == HloOpcode::kTuple); + VLOG(2) << "Processing root " << while_body->root_instruction()->ToString(); + + auto old_loop_roots = while_body->root_instruction()->mutable_operands(); + HloInstruction* input_parameter = while_body->parameter_instruction(0); + VLOG(2) << "Processing input parameter " << input_parameter->ToString(); + absl::flat_hash_map old_to_new_map; + absl::flat_hash_set skip_control_dep_injection; + + if (exact_trip_count % 2) { + VLOG(2) << "Found loops with odd trip count, 1 iteration will be peeled " + "outside of the main body."; + TF_RETURN_IF_ERROR(PeelInstructionsForOddTripCount(module, while_instr)); + exact_trip_count -= 1; + } + HloCloneContext context(module, "double_buffer_clone"); + old_to_new_map[input_parameter] = while_body->root_instruction(); + for (HloInstruction* old_instr : while_body->MakeInstructionPostOrder()) { + if (old_to_new_map.find(old_instr) != old_to_new_map.end()) { + continue; + } + VLOG(2) << "Cloning instruction " << old_instr->ToString(); + std::vector new_operands; + for (HloInstruction* old_operand : old_instr->mutable_operands()) { + new_operands.push_back(old_to_new_map[old_operand]); + } + HloInstruction* new_instr = + while_body->AddInstruction(old_instr->CloneWithNewOperands( + old_instr->shape(), new_operands, &context)); + + // If an elementwise instruction with constant operand is present, we + // won't inject control dependency at the end to allow more constant + // folding opportunities. + if (old_instr->IsElementwiseBinary() && old_instr->HasConstantOperand()) { + skip_control_dep_injection.insert(old_instr); + } + SetChannelIdForNewCollective(new_instr, module); + old_to_new_map[old_instr] = new_instr; + VLOG(2) << "Added instruction " << new_instr->ToString(); + } + + while_body->set_root_instruction( + old_to_new_map[while_body->root_instruction()]); + VLOG(2) << "Replaced with new root " + << while_body->root_instruction()->ToString(); + + // Handle existing control dependencies. + for (HloInstruction* old_instr : while_body->MakeInstructionPostOrder()) { + if (old_to_new_map.find(old_instr) != old_to_new_map.end()) { + HloInstruction* new_instr = old_to_new_map[old_instr]; + VLOG(2) << "Processing control predecessors for " + << new_instr->ToString(); + std::vector new_control_pred( + old_instr->control_predecessors().size()); + for (HloInstruction* pred : old_instr->control_predecessors()) { + new_control_pred.push_back(old_to_new_map[pred]); + } + + TF_RETURN_IF_ERROR(new_instr->DropAllControlDeps()); + for (HloInstruction* new_pred : new_control_pred) { + TF_RETURN_IF_ERROR(new_pred->AddControlDependencyTo(new_instr)); + VLOG(2) << "Adding " << new_pred->ToString() + << " to control dependency of " << new_instr->ToString(); + } + } + } + for (HloInstruction* input_consumer : input_parameter->users()) { + for (HloInstruction* old_input : input_consumer->users()) { + HloInstruction* new_input = old_to_new_map[old_input]; + if (skip_control_dep_injection.find(old_input) == + skip_control_dep_injection.end() && + !IsCollective(old_input)) { + for (HloInstruction* old_root : old_loop_roots) { + TF_RETURN_IF_ERROR(old_root->AddControlDependencyTo(new_input)); + } + } + } + } + WhileLoopBackendConfig new_config; + new_config.mutable_known_trip_count()->set_n((exact_trip_count / 2)); + TF_RETURN_IF_ERROR(while_instr->set_backend_config(new_config)); + changed = true; + } + + VLOG(2) << "LoopDoubleBufferTransformer output: " << module->ToString(); + + return changed; +} + +} // end namespace gpu +} // end namespace xla diff --git a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h b/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h new file mode 100644 index 00000000000000..3e10b445afbc49 --- /dev/null +++ b/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h @@ -0,0 +1,55 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef XLA_SERVICE_GPU_LOOP_DOUBLE_BUFFER_TRANSFORMER_H_ +#define XLA_SERVICE_GPU_LOOP_DOUBLE_BUFFER_TRANSFORMER_H_ + +#include + +#include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/service/gpu/gpu_types.h" +#include "xla/service/hlo_pass_interface.h" +#include "xla/statusor.h" + +namespace xla { +namespace gpu { + +// This pass performs the unrolling-by-2 loop transformation +// to effectively achieve double buffering between inputs and outputs +// of previously rolled iterations. +// This pass only runs on loops with known trip counts. +// For even number of iterations, unrolling-by-2 will be done directly. +// For odd number of iterations, the first iteration of the loop will be +// peeled outside of the while loop to make the trip count an even number, +// then proceed to unroll by 2. +// It also updates the trip count property of the loop to the correct one (n/2). +class LoopDoubleBufferTransformer : public HloModulePass { + public: + LoopDoubleBufferTransformer() = default; + ~LoopDoubleBufferTransformer() override = default; + absl::string_view name() const override { + return "loop-double-buffer-transformer"; + } + + using HloPassInterface::Run; + StatusOr Run( + HloModule* module, + const absl::flat_hash_set& execution_threads) override; +}; + +} // end namespace gpu +} // end namespace xla + +#endif // XLA_SERVICE_GPU_LOOP_DOUBLE_BUFFER_TRANSFORMER_H_ diff --git a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc b/third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc new file mode 100644 index 00000000000000..46ec880c60e077 --- /dev/null +++ b/third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc @@ -0,0 +1,361 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/service/gpu/loop_double_buffer_transformer.h" + +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "xla/hlo/ir/hlo_computation.h" +#include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/hlo/ir/hlo_opcode.h" +#include "xla/service/hlo_dce.h" +#include "xla/service/tuple_simplifier.h" +#include "xla/test.h" +#include "xla/test_helpers.h" +#include "xla/tests/hlo_test_base.h" +#include "xla/xla.pb.h" +#include "xla/xla_data.pb.h" +#include "tsl/platform/statusor.h" + +namespace xla { +namespace gpu { +namespace { + +int64_t CountInstructions(const HloComputation& computation, HloOpcode opcode) { + int64_t count = 0; + for (const auto& instruction : computation.instructions()) { + if (instruction->opcode() == opcode) { + count++; + } + } + return count; +} + +int64_t CountInstructions(const HloModule& module, HloOpcode opcode) { + int64_t count = 0; + for (const auto& computation : module.computations()) { + count += CountInstructions((*computation), opcode); + } + return count; +} + +class GpuLoopDoubleBufferTransformerTest : public HloTestBase { + DebugOptions GetDebugOptionsForTest() override { + DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_enable_while_loop_double_buffering(true); + return debug_options; + } +}; + +TEST_F(GpuLoopDoubleBufferTransformerTest, UnrolledLoopEvenTripCount) { + const char* const kModuleString = R"( +HloModule all_gather_overlapping +condition { + input_tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) parameter(0) + cond = s32[] get-tuple-element(input_tuple), index=3 + trip_count = s32[] constant(10) + ROOT done = pred[] compare(cond, trip_count), direction=LT +} + +body { + input_tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) parameter(0) + param_0 = f32[1,128] get-tuple-element(input_tuple), index=0 + param_1 = f32[2,128] get-tuple-element(input_tuple), index=2 + cond = s32[] get-tuple-element(input_tuple), index=3 + c0 = f32[] constant(0) + splat_c0 = f32[1,128] broadcast(c0), dimensions={} + add = f32[1,128] add(splat_c0, param_0) + // Start all-gather communication + all-gather-start = (f32[1,128], f32[2,128]) all-gather-start(add), channel_id=1337, replica_groups={{0,1}}, dimensions={0}, use_global_device_ids=true + // Intertwined with the all-gather communication, an operation happens which + // depends on param_1, but crucially has a different output shape (which + // excludes reusing param_1's buffer for its output). + c1_s32 = s32[] constant(1) + c0_s32 = s32[] constant(0) + one = s32[] constant(1) + cond_plus_1 = s32[] add(cond, one) + dynamic-slice = f32[1,128] dynamic-slice(param_1, c1_s32, c0_s32), dynamic_slice_sizes={1,128} + // The all-gather communication finishes + all-gather-done = f32[2,128] all-gather-done(all-gather-start) + ROOT output_tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) tuple(param_0, dynamic-slice, all-gather-done, cond_plus_1) +} + +ENTRY main { + param_0 = f32[1,128] parameter(0) + param_1 = f32[2,128] parameter(1) + param_2 = s32[] constant(0) + tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) tuple(param_0, param_0, param_1, param_2) + ROOT while = (f32[1,128], f32[1,128], f32[2,128], s32[]) while(tuple), condition=condition, body=body, backend_config={"known_trip_count":{"n":"10"}} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleString)); + LoopDoubleBufferTransformer double_buffer; + HloDCE dce; + TupleSimplifier tuple_simp; + ASSERT_IS_OK(double_buffer.Run(module.get()).status()); + ASSERT_IS_OK(tuple_simp.Run(module.get()).status()); + ASSERT_IS_OK(dce.Run(module.get()).status()); + + HloInstruction* while_instruction; + for (auto instr : module->entry_computation()->instructions()) { + if (instr->opcode() == HloOpcode::kWhile) { + while_instruction = instr; + } + } + TF_ASSERT_OK_AND_ASSIGN( + WhileLoopBackendConfig config, + while_instruction->backend_config()); + int64_t exact_trip_count = config.known_trip_count().n(); + // We expect that after unrolling, the total trip count is half of original + // count. + EXPECT_EQ(exact_trip_count, 5); + // We expect that after unrolling, there should be 2 allgather starts, + // both in while body. + EXPECT_EQ(CountInstructions((*while_instruction->while_body()), + HloOpcode::kAllGatherStart), + 2); + EXPECT_EQ(CountInstructions((*module), HloOpcode::kAllGatherStart), 2); +} + +TEST_F(GpuLoopDoubleBufferTransformerTest, UnrolledLoopOddTripCount) { + const char* const kModuleString = R"( +HloModule all_gather_overlapping +condition { + input_tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) parameter(0) + cond = s32[] get-tuple-element(input_tuple), index=3 + trip_count = s32[] constant(10) + ROOT done = pred[] compare(cond, trip_count), direction=LT +} + +body { + input_tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) parameter(0) + param_0 = f32[1,128] get-tuple-element(input_tuple), index=0 + param_1 = f32[2,128] get-tuple-element(input_tuple), index=2 + cond = s32[] get-tuple-element(input_tuple), index=3 + c0 = f32[] constant(0) + splat_c0 = f32[1,128] broadcast(c0), dimensions={} + add = f32[1,128] add(splat_c0, param_0) + // Start all-gather communication + all-gather-start = (f32[1,128], f32[2,128]) all-gather-start(add), channel_id=1337, replica_groups={{0,1}}, dimensions={0}, use_global_device_ids=true + // Intertwined with the all-gather communication, an operation happens which + // depends on param_1, but crucially has a different output shape (which + // excludes reusing param_1's buffer for its output). + c1_s32 = s32[] constant(1) + c0_s32 = s32[] constant(0) + one = s32[] constant(1) + cond_plus_1 = s32[] add(cond, one) + dynamic-slice = f32[1,128] dynamic-slice(param_1, c1_s32, c0_s32), dynamic_slice_sizes={1,128} + // The all-gather communication finishes + all-gather-done = f32[2,128] all-gather-done(all-gather-start) + ROOT output_tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) tuple(param_0, dynamic-slice, all-gather-done, cond_plus_1) +} + +ENTRY main { + param_0 = f32[1,128] parameter(0) + param_1 = f32[2,128] parameter(1) + param_2 = s32[] constant(0) + tuple = (f32[1,128], f32[1,128], f32[2,128], s32[]) tuple(param_0, param_0, param_1, param_2) + ROOT while = (f32[1,128], f32[1,128], f32[2,128], s32[]) while(tuple), condition=condition, body=body, backend_config={"known_trip_count":{"n":"11"}} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleString)); + LoopDoubleBufferTransformer double_buffer; + HloDCE dce; + TupleSimplifier tuple_simp; + ASSERT_IS_OK(double_buffer.Run(module.get()).status()); + ASSERT_IS_OK(tuple_simp.Run(module.get()).status()); + ASSERT_IS_OK(dce.Run(module.get()).status()); + + // We expect that for the while loop, no further copy needs to be added to the + // module. + HloInstruction* while_instruction; + for (auto instr : module->entry_computation()->instructions()) { + if (instr->opcode() == HloOpcode::kWhile) { + while_instruction = instr; + } + } + TF_ASSERT_OK_AND_ASSIGN( + WhileLoopBackendConfig config, + while_instruction->backend_config()); + int64_t exact_trip_count = config.known_trip_count().n(); + // We expect that after unrolling, the total trip count is half of original + // count. + EXPECT_EQ(exact_trip_count, 5); + + // We expect that after unrolling, there should be 3 allgather starts, + // 1 in parent computation, 2 in while body. + EXPECT_EQ(CountInstructions((*while_instruction->while_body()), + HloOpcode::kAllGatherStart), + 2); + EXPECT_EQ(CountInstructions((*module), HloOpcode::kAllGatherStart), 3); + + // We expect that after unrolling, the third operand of the input tuple should + // be the peeled allgather done. + EXPECT_EQ(while_instruction->operand(0)->operand(2)->opcode(), + HloOpcode::kAllGatherDone); +} + +TEST_F(GpuLoopDoubleBufferTransformerTest, + UnrolledLoopNoControlDepsForConstantAdd) { + const char* const kModuleString = R"( +HloModule loop_unrolling_no_deps +condition { + input_tuple = (f32[], s32[]) parameter(0) + cond = s32[] get-tuple-element(input_tuple), index=1 + trip_count = s32[] constant(10) + ROOT done = pred[] compare(cond, trip_count), direction=LT +} + +body { + input_tuple = (f32[], s32[]) parameter(0) + param_0 = f32[] get-tuple-element(input_tuple), index=0 + cond = s32[] get-tuple-element(input_tuple), index=1 + c2 = f32[] constant(2) + add = f32[] add(c2, param_0) + one = s32[] constant(1) + cond_plus_1 = s32[] add(cond, one) + ROOT output_tuple = (f32[], s32[]) tuple(add, cond_plus_1) +} + +ENTRY main { + param_0 = f32[] parameter(0) + param_2 = s32[] constant(0) + tuple = (f32[], s32[]) tuple(param_0, param_2) + ROOT while = (f32[], s32[]) while(tuple), condition=condition, body=body, backend_config={"known_trip_count":{"n":"11"}} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleString)); + LoopDoubleBufferTransformer double_buffer; + HloDCE dce; + TupleSimplifier tuple_simp; + ASSERT_IS_OK(double_buffer.Run(module.get()).status()); + ASSERT_IS_OK(tuple_simp.Run(module.get()).status()); + ASSERT_IS_OK(dce.Run(module.get()).status()); + + HloInstruction* while_instruction; + for (auto instr : module->entry_computation()->instructions()) { + if (instr->opcode() == HloOpcode::kWhile) { + while_instruction = instr; + } + } + TF_ASSERT_OK_AND_ASSIGN( + WhileLoopBackendConfig config, + while_instruction->backend_config()); + int64_t exact_trip_count = config.known_trip_count().n(); + // We expect that after unrolling, the total trip count is half of original + // count. + EXPECT_EQ(exact_trip_count, 5); + + // We expect that after unrolling, there should be 4 adds + EXPECT_EQ( + CountInstructions((*while_instruction->while_body()), HloOpcode::kAdd), + 4); + + // We expect that after unrolling, the first operand of the output tuple + // should not have any control dependency since it's a elementwise add with a + // constant operand. + EXPECT_EQ(while_instruction->while_body() + ->root_instruction() + ->operand(0) + ->control_predecessors() + .size(), + 0); +} + +TEST_F(GpuLoopDoubleBufferTransformerTest, + UnrolledLoopNoControlDepsForCollective) { + const char* const kModuleString = R"( +HloModule loop_unrolling_no_deps +condition { + input_tuple = (f32[], s32[]) parameter(0) + cond = s32[] get-tuple-element(input_tuple), index=1 + trip_count = s32[] constant(10) + ROOT done = pred[] compare(cond, trip_count), direction=LT +} + +ar_add { + Arg_1 = f32[] parameter(1) + Arg_0 = f32[] parameter(0) + ROOT add_ar = f32[] add(Arg_1, Arg_0) +} + +body { + input_tuple = (f32[], s32[]) parameter(0) + param_0 = f32[] get-tuple-element(input_tuple), index=0 + cond = s32[] get-tuple-element(input_tuple), index=1 + all-reduce-start = f32[] all-reduce-start(param_0), channel_id=8, replica_groups={{0}}, to_apply=ar_add, backend_config="{\"is_sync\":false}" + one = s32[] constant(1) + all-reduce-done = f32[] all-reduce-done(all-reduce-start) + cond_plus_1 = s32[] add(cond, one) + ROOT output_tuple = (f32[], s32[]) tuple(all-reduce-done, cond_plus_1) +} + +ENTRY main { + param_0 = f32[] parameter(0) + param_2 = s32[] constant(0) + tuple = (f32[], s32[]) tuple(param_0, param_2) + ROOT while = (f32[], s32[]) while(tuple), condition=condition, body=body, backend_config={"known_trip_count":{"n":"10"}} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kModuleString)); + LoopDoubleBufferTransformer double_buffer; + HloDCE dce; + TupleSimplifier tuple_simp; + ASSERT_IS_OK(double_buffer.Run(module.get()).status()); + ASSERT_IS_OK(tuple_simp.Run(module.get()).status()); + ASSERT_IS_OK(dce.Run(module.get()).status()); + + HloInstruction* while_instruction; + for (auto instr : module->entry_computation()->instructions()) { + if (instr->opcode() == HloOpcode::kWhile) { + while_instruction = instr; + } + } + TF_ASSERT_OK_AND_ASSIGN( + WhileLoopBackendConfig config, + while_instruction->backend_config()); + int64_t exact_trip_count = config.known_trip_count().n(); + // We expect that after unrolling, the total trip count is half of original + // count. + EXPECT_EQ(exact_trip_count, 5); + + // We expect that after unrolling, there should be 2 all-reduce-starts + EXPECT_EQ(CountInstructions((*while_instruction->while_body()), + HloOpcode::kAllReduceStart), + 2); + absl::flat_hash_set channel_ids; + for (HloInstruction* ar : while_instruction->while_body()->instructions()) { + if (ar->opcode() == HloOpcode::kAllReduceStart) { + // We expect that after unrolling, allreduces should not have any control + // deps. + EXPECT_EQ(ar->control_predecessors().size(), 0); + channel_ids.insert(*(ar->channel_id())); + } + } + // we expect that all 2 allreduces will have different channel ids. + EXPECT_EQ(channel_ids.size(), 2); +} + +} // namespace +} // namespace gpu +} // namespace xla diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index b4c617d5db56e6..19a9fd73b5814c 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -620,7 +620,10 @@ message DebugOptions { // faster. bool xla_gpu_cublas_fallback = 247; - // Next id: 248 + // Enable double buffering for loops. + bool xla_gpu_enable_while_loop_double_buffering = 248; + + // Next id: 249 // Extra options to pass to the compilation backend (e.g. LLVM); specific // interpretation of these values is left to the backend. From 87ce74262ac34a46d29113850c643d722c0655a3 Mon Sep 17 00:00:00 2001 From: Chao Date: Tue, 26 Sep 2023 03:30:52 -0700 Subject: [PATCH 261/567] PR #5867: [ROCm] fixed rocm kernel link Imported from GitHub PR https://github.com/openxla/xla/pull/5867 Fixed a recent build error due to the missing link at rocm gpu executor and some updates on rocm_driver @akuegel @ddunl Thanks in advance! Copybara import of the project: -- 63801cf1b46aee77ccc272e0eb65624367553fe2 by Chao Chen : fixed rocm kernel link Merging this change closes #5867 PiperOrigin-RevId: 568485642 --- .../xla/xla/stream_executor/gpu/gpu_types.h | 2 +- .../xla/xla/stream_executor/rocm/BUILD | 7 +++--- .../xla/stream_executor/rocm/rocm_driver.cc | 10 ++++----- .../rocm/rocm_driver_wrapper.h | 2 +- .../stream_executor/rocm/rocm_gpu_executor.cc | 22 ++++++++++++------- 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_types.h b/third_party/xla/xla/stream_executor/gpu/gpu_types.h index 4a0d643a4f9819..dea81d66a1d59d 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_types.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_types.h @@ -42,7 +42,7 @@ using GpuContextHandle = hipCtx_t; using GpuStreamHandle = hipStream_t; using GpuEventHandle = hipEvent_t; using GpuFunctionHandle = hipFunction_t; -using GpuFunctionAttribute = hipFuncAttribute; +using GpuFunctionAttribute = hipFunction_attribute; using GpuDeviceHandle = hipDevice_t; using GpuDevicePtr = hipDeviceptr_t; using GpuDeviceAttribute = hipDeviceAttribute_t; diff --git a/third_party/xla/xla/stream_executor/rocm/BUILD b/third_party/xla/xla/stream_executor/rocm/BUILD index 74f710420f6e40..b9e5f20ef22af0 100644 --- a/third_party/xla/xla/stream_executor/rocm/BUILD +++ b/third_party/xla/xla/stream_executor/rocm/BUILD @@ -108,10 +108,11 @@ cc_library( ":rocm_platform_id", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/strings", + "//xla/stream_executor:kernel", "//xla/stream_executor:event", "//xla/stream_executor:plugin_registry", "//xla/stream_executor:stream_executor_internal", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor:stream_executor_pimpl_header", "//xla/stream_executor/gpu:gpu_activation_header", "//xla/stream_executor/gpu:gpu_event", "//xla/stream_executor/gpu:gpu_kernel_header", @@ -160,7 +161,7 @@ cc_library( "@com_google_absl//absl/memory", "//xla/stream_executor", # buildcleaner: keep "//xla/stream_executor:multi_platform_manager", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor:stream_executor_pimpl_header", "//xla/stream_executor/platform", ]), alwayslink = True, # Registers itself with the MultiPlatformManager. @@ -317,7 +318,7 @@ cc_library( "//xla/stream_executor:event", "//xla/stream_executor:plugin_registry", "//xla/stream_executor:scratch_allocator", - "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor:stream_executor_pimpl", "//xla/stream_executor:temporary_device_memory", "//xla/stream_executor/gpu:gpu_activation_header", "//xla/stream_executor/gpu:gpu_stream_header", diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc index bde91ca30582c7..d48ed7e3818d92 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_driver.cc @@ -425,11 +425,11 @@ bool DeviceOptionsToContextFlags(const DeviceOptions& device_options, return context->context(); } -/* static */ tsl::Status GpuDriver::FuncGetAttribute(hipFuncAttribute attribute, - hipFunction_t func, - int* attribute_value) { - RETURN_IF_ROCM_ERROR(hipFuncSetAttribute(func, attribute, *attribute_value), - "Failed to query kernel attribute: ", attribute); +/* static */ tsl::Status GpuDriver::FuncGetAttribute( + hipFunction_attribute attribute, hipFunction_t func, int* attribute_value) { + RETURN_IF_ROCM_ERROR( + wrap::hipFuncGetAttribute(attribute_value, attribute, func), + "Failed to query kernel attribute: ", attribute); return tsl::OkStatus(); } diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_driver_wrapper.h b/third_party/xla/xla/stream_executor/rocm/rocm_driver_wrapper.h index 3aa2cf191b4d6f..5808f4c266ce85 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_driver_wrapper.h +++ b/third_party/xla/xla/stream_executor/rocm/rocm_driver_wrapper.h @@ -93,7 +93,7 @@ namespace wrap { __macro(hipEventSynchronize) \ __macro(hipFree) \ __macro(hipFuncSetCacheConfig) \ - __macro(hipFuncSetAttribute) \ + __macro(hipFuncGetAttribute) \ __macro(hipGetDevice) \ __macro(hipGetDeviceCount) \ __macro(hipGetDeviceProperties) \ diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc index 618514de604493..1443714e623719 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_gpu_executor.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "xla/stream_executor/rocm/rocm_gpu_executor.h" #include @@ -24,6 +25,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" #include "xla/stream_executor/gpu/gpu_command_buffer.h" #include "xla/stream_executor/gpu/gpu_driver.h" #include "xla/stream_executor/gpu/gpu_event.h" @@ -203,10 +205,10 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, const string* kernel_name; const OnDiskKernelLoaderSpec* on_disk_spec = nullptr; - bool has_cubin = spec.has_cuda_cubin_on_disk(); - if (has_cubin) { - on_disk_spec = &spec.cuda_cubin_on_disk(); - } + + VLOG(3) << "GetKernel on kernel " << kernel << " : " << kernel->name(); + + if (spec.has_cuda_cubin_on_disk()) on_disk_spec = &spec.cuda_cubin_on_disk(); if (on_disk_spec != nullptr) { return tsl::errors::Internal( @@ -244,19 +246,23 @@ tsl::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, tsl::Status GpuExecutor::GetKernelMetadata(GpuKernel* rocm_kernel, KernelMetadata* kernel_metadata) { int value = 0; - // TODO(ROCm) implement this feature in HIP + TF_RETURN_IF_ERROR(GpuDriver::FuncGetAttribute( + HIP_FUNC_ATTRIBUTE_NUM_REGS, *rocm_kernel->gpu_function_ptr(), &value)); kernel_metadata->set_registers_per_thread(value); - // TODO(ROCm) implement this feature in HIP + TF_RETURN_IF_ERROR( + GpuDriver::FuncGetAttribute(HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, + *rocm_kernel->gpu_function_ptr(), &value)); kernel_metadata->set_shared_memory_bytes(value); - return tsl::OkStatus(); + return ::tsl::OkStatus(); } tsl::Status GpuExecutor::Launch(Stream* stream, const ThreadDim& thread_dims, const BlockDim& block_dims, const KernelBase& kernel, const KernelArgsArrayBase& args) { - CHECK_EQ(kernel.Arity(), args.number_of_arguments()); + CHECK_EQ(kernel.Arity() + (args.number_of_shared_bytes() > 0), + args.number_of_arguments()); GpuStreamHandle hipstream = AsGpuStreamValue(stream); const GpuKernel* rocm_kernel = AsGpuKernel(&kernel); hipFunction_t hipfunc = rocm_kernel->AsGpuFunctionHandle(); From ae01629d90f6f8ef3a04de41bb459b1f5e475df6 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Tue, 26 Sep 2023 03:53:52 -0700 Subject: [PATCH 262/567] [XLA:GPU] Trigger Triton GEMM fusions also on kCopy input operations. PiperOrigin-RevId: 568489918 --- .../xla/service/gpu/gemm_rewriter_triton.cc | 30 ++++++++++++------- .../xla/service/gpu/ir_emitter_triton_test.cc | 22 ++++++++++++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc index 59cecc4dba6e77..88be780bf25308 100644 --- a/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc +++ b/third_party/xla/xla/service/gpu/gemm_rewriter_triton.cc @@ -73,6 +73,16 @@ limitations under the License. namespace xla { namespace gpu { +int GetFusionLevel(const HloInstruction& hlo, const GpuVersion gpu_version) { + int level = + hlo.GetModule()->config().debug_options().xla_gpu_triton_fusion_level(); + if (!std::get(gpu_version) + .IsAtLeast(se::CudaComputeCapability::AMPERE)) { + level = std::min(level, 1); + } + return level; +} + bool HasDivisibleSuffixAllowingSplit(const absl::Span span, const int64_t divisor) { CHECK_GE(divisor, 1); @@ -1077,12 +1087,6 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( absl::flat_hash_map& old_to_new_mapping, const GpuVersion gpu_version) const { - int fusion_level = - hlo.GetModule()->config().debug_options().xla_gpu_triton_fusion_level(); - if (!std::get(gpu_version) - .IsAtLeast(se::CudaComputeCapability::AMPERE)) { - fusion_level = std::min(fusion_level, 1); - } if (hlo.opcode() == HloOpcode::kTuple || hlo.opcode() == HloOpcode::kGetTupleElement) { return "Unsupported instruction."; @@ -1103,7 +1107,7 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( return "Unsupported output data type."; } if (as_input) { - if (fusion_level < 2) { + if (GetFusionLevel(hlo, gpu_version) < 2) { if (hlo.opcode() == HloOpcode::kConvert) { if (FusionDecision decision = RequireTritonFusibleConvert(&hlo, gpu_version); @@ -1119,7 +1123,7 @@ DimOrderUpdatesOrError FusionContext::AnalyzeForFusion( } } } else { - if (fusion_level < 2) { + if (GetFusionLevel(hlo, gpu_version) < 2) { return "Skipping fusing outputs at low fusion levels."; } for (const HloInstruction* operand : hlo.operands()) { @@ -1369,10 +1373,14 @@ StatusOr FuseDot(HloInstruction& dot, if (dot.GetModule()->config().debug_options().xla_gpu_triton_gemm_any()) { return FusionDecision{}; } + + absl::flat_hash_set triggers{ + HloOpcode::kConvert, HloOpcode::kSlice, HloOpcode::kTranspose}; + if (GetFusionLevel(dot, gpu_version) >= 2) { + triggers.insert(HloOpcode::kCopy); + } for (const auto& iter : old_to_new_mapping) { - if (iter.second->opcode() == HloOpcode::kConvert || - iter.second->opcode() == HloOpcode::kSlice || - iter.second->opcode() == HloOpcode::kTranspose) { + if (triggers.contains(iter.second->opcode())) { return FusionDecision{}; } } diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 014b006b6ab2a7..0e20fc806662dc 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -1091,6 +1091,28 @@ ENTRY e { EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{/*aabs=*/2e-3, /*arel=*/2e-3})); } +TEST_F(TritonGemmLevel2Test, FuseTransposeWithoutMixedTypes) { + const std::string kHloText = R"( +ENTRY e { + p1 = f16[150,32,60]{2,1,0} parameter(1) + p0 = f16[75,2,26,60]{3,2,1,0} parameter(0) + t = f16[75,2,60,26]{3,2,1,0} transpose(p0), dimensions={0,1,3,2} + r = f16[150,60,26]{2,1,0} reshape(t) + ROOT tmp_4 = f16[150,32,26]{2,1,0} dot(p1, r), + lhs_batch_dims={0}, lhs_contracting_dims={2}, + rhs_batch_dims={0}, rhs_contracting_dims={1} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + GetOptimizedModule(kHloText)); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + GmockMatch(m::Fusion(m::Parameter(), m::Parameter()) + .WithFusionKind(HloInstruction::FusionKind::kCustom))); + + EXPECT_TRUE(RunAndCompare(kHloText, ErrorSpec{/*aabs=*/1e-3, /*arel=*/1e-3})); +} + TEST_F(TritonGemmTest, SineOutputIsNotFused) { const std::string kHloText = R"( HloModule m From 079ab56a5b0c5aefcdffa4f20f79c7bb137c6bd0 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Tue, 26 Sep 2023 04:28:32 -0700 Subject: [PATCH 263/567] Retire MLIR interpreter and related tooling. This interpreter no longer makes sense inside XLA/isn't actively used. Retire here. Ongoing discussion with upstream to potentially move there post some refactoring, but can be recovered from commit history if and when needed rather than maintain until then. PiperOrigin-RevId: 568496965 --- .../xla/xla/mlir/tools/mlir_bisect/BUILD | 69 -- .../xla/xla/mlir/tools/mlir_bisect/README.md | 85 -- .../xla/mlir/tools/mlir_bisect/bisect_lib.cc | 81 -- .../xla/mlir/tools/mlir_bisect/bisect_lib.h | 96 -- .../xla/mlir/tools/mlir_bisect/mlir_bisect.cc | 363 ------- .../xla/mlir/tools/mlir_bisect/rewrites/BUILD | 26 - .../mlir/tools/mlir_bisect/rewrites/func.cc | 80 -- .../tools/mlir_bisect/rewrites/general.cc | 186 ---- .../mlir/tools/mlir_bisect/rewrites/scf.cc | 139 --- .../tools/mlir_bisect/rewrites/tests/BUILD | 24 - .../tests/erase-op-without-results.mlir | 12 - .../rewrites/tests/inline-scf-while.mlir | 40 - .../tests/reduce-scf-forall-bounds.mlir | 16 - .../tests/replace-op-with-constant.mlir | 26 - .../rewrites/tests/replace-op-with-value.mlir | 16 - .../tests/replace-operand-with-constant.mlir | 28 - ...eturn-operands-of-terminator-operands.mlir | 15 - .../rewrites/tests/truncate-function.mlir | 31 - .../xla/mlir/tools/mlir_bisect/test_passes.cc | 48 - .../xla/mlir/tools/mlir_bisect/test_passes.h | 29 - .../xla/mlir/tools/mlir_bisect/tests/BUILD | 26 - .../mlir/tools/mlir_bisect/tests/bisect.mlir | 46 - .../mlir/tools/mlir_bisect/tests/no-bug.mlir | 10 - .../tools/mlir_bisect/tests/snapshot.mlir | 12 - .../tools/mlir_bisect/tests/snapshot.mlir.pb | Bin 68 -> 0 bytes .../xla/xla/mlir/tools/mlir_replay/BUILD | 71 -- .../xla/xla/mlir/tools/mlir_replay/README.md | 48 - .../xla/mlir/tools/mlir_replay/mlir_replay.cc | 248 ----- .../mlir/tools/mlir_replay/mlir_replay_lib.cc | 263 ------ .../mlir/tools/mlir_replay/mlir_replay_lib.h | 40 - .../xla/mlir/tools/mlir_replay/public/BUILD | 42 - .../mlir/tools/mlir_replay/public/README.md | 11 +- .../mlir_replay/public/execution_trace.proto | 72 -- .../public/execution_trace_utils.cc | 447 --------- .../public/execution_trace_utils.h | 76 -- .../public/execution_trace_utils_test.cc | 138 --- third_party/xla/xla/mlir_hlo/BUILD | 117 --- .../tools/mlir_interpreter/dialects/affine.cc | 51 - .../tools/mlir_interpreter/dialects/arith.cc | 305 ------ .../dialects/bufferization.cc | 69 -- .../mlir_interpreter/dialects/builtin.cc | 54 -- .../mlir_interpreter/dialects/comparators.h | 104 -- .../mlir_interpreter/dialects/complex.cc | 63 -- .../mlir_interpreter/dialects/cwise_math.h | 239 ----- .../mlir_interpreter/dialects/deallocation.cc | 88 -- .../tools/mlir_interpreter/dialects/func.cc | 116 --- .../tools/mlir_interpreter/dialects/gml_st.cc | 45 - .../tools/mlir_interpreter/dialects/linalg.cc | 310 ------ .../tools/mlir_interpreter/dialects/math.cc | 64 -- .../tools/mlir_interpreter/dialects/memref.cc | 247 ----- .../tools/mlir_interpreter/dialects/mhlo.cc | 886 ------------------ .../dialects/mhlo_binary_cwise.cc | 45 - .../dialects/mhlo_unary_cwise.cc | 84 -- .../tools/mlir_interpreter/dialects/scf.cc | 216 ----- .../tools/mlir_interpreter/dialects/tensor.cc | 256 ----- .../mlir_interpreter/dialects/tests/BUILD | 24 - .../dialects/tests/affine/apply.mlir | 63 -- .../dialects/tests/affine/minmax.mlir | 36 - .../dialects/tests/arith/bitcast.mlir | 21 - .../dialects/tests/arith/cmpf.mlir | 129 --- .../dialects/tests/arith/cmpi.mlir | 147 --- .../dialects/tests/arith/constant.mlir | 37 - .../dialects/tests/arith/extf.mlir | 11 - .../dialects/tests/arith/fptosi.mlir | 21 - .../dialects/tests/arith/index_cast.mlir | 28 - .../dialects/tests/arith/int_math.mlir | 111 --- .../dialects/tests/arith/minmax.mlir | 25 - .../dialects/tests/arith/negf.mlir | 21 - .../dialects/tests/arith/remf.mlir | 12 - .../dialects/tests/arith/select.mlir | 52 - .../dialects/tests/arith/sitofp.mlir | 31 - .../dialects/tests/arith/uitofp.mlir | 31 - .../dialects/tests/arith/vector_math.mlir | 12 - .../tests/bufferization/alloc_tensor.mlir | 30 - .../dialects/tests/bufferization/clone.mlir | 14 - .../tests/bufferization/to_memref.mlir | 10 - .../tests/bufferization/to_tensor.mlir | 11 - .../builtin/unrealized_conversion_cast.mlir | 21 - .../dialects/tests/complex/complex.mlir | 186 ---- .../tests/deallocation/deallocation.mlir | 51 - .../dialects/tests/func/call.mlir | 48 - .../dialects/tests/gml_st/fusion.mlir | 35 - .../dialects/tests/linalg/broadcast.mlir | 30 - .../dialects/tests/linalg/dot.mlir | 14 - .../dialects/tests/linalg/fill.mlir | 24 - .../dialects/tests/linalg/generic.mlir | 113 --- .../dialects/tests/linalg/map.mlir | 74 -- .../dialects/tests/linalg/matmul.mlir | 41 - .../dialects/tests/linalg/reduce.mlir | 57 -- .../dialects/tests/linalg/transpose.mlir | 27 - .../dialects/tests/linalg/vecmat.mlir | 14 - .../dialects/tests/math/math.mlir | 252 ----- .../dialects/tests/memref/alloc.mlir | 57 -- .../dialects/tests/memref/collapse_shape.mlir | 33 - .../dialects/tests/memref/copy.mlir | 39 - .../dialects/tests/memref/dim.mlir | 12 - .../dialects/tests/memref/expand_shape.mlir | 52 - .../dialects/tests/memref/get_global.mlir | 12 - .../dialects/tests/memref/invalid.mlir | 77 -- .../dialects/tests/memref/load.mlir | 12 - .../dialects/tests/memref/subview.mlir | 131 --- .../dialects/tests/mhlo/bitcast_convert.mlir | 11 - .../dialects/tests/mhlo/broadcast_in_dim.mlir | 20 - .../dialects/tests/mhlo/case.mlir | 17 - .../dialects/tests/mhlo/clamp.mlir | 27 - .../dialects/tests/mhlo/compare.mlir | 143 --- .../dialects/tests/mhlo/complex_math.mlir | 100 -- .../tests/mhlo/compute_reshape_shape.mlir | 26 - .../dialects/tests/mhlo/concatenate.mlir | 37 - .../dialects/tests/mhlo/constant.mlir | 25 - .../dialects/tests/mhlo/convert.mlir | 21 - .../dialects/tests/mhlo/dot.mlir | 37 - .../dialects/tests/mhlo/dot_general.mlir | 73 -- .../dialects/tests/mhlo/dynamic_slice.mlir | 32 - .../tests/mhlo/dynamic_update_slice.mlir | 34 - .../dialects/tests/mhlo/float_math.mlir | 199 ---- .../dialects/tests/mhlo/gather.mlir | 78 -- .../dialects/tests/mhlo/int_math.mlir | 358 ------- .../dialects/tests/mhlo/iota.mlir | 30 - .../dialects/tests/mhlo/pad.mlir | 56 -- .../dialects/tests/mhlo/reduce.mlir | 17 - .../dialects/tests/mhlo/reshape.mlir | 34 - .../dialects/tests/mhlo/scatter.mlir | 55 -- .../dialects/tests/mhlo/select.mlir | 14 - .../dialects/tests/mhlo/slice.mlir | 16 - .../dialects/tests/mhlo/sort.mlir | 25 - .../dialects/tests/mhlo/subtract.mlir | 10 - .../dialects/tests/mhlo/transpose.mlir | 28 - .../dialects/tests/mhlo/tuple.mlir | 30 - .../dialects/tests/mhlo/while.mlir | 25 - .../dialects/tests/scf/for.mlir | 82 -- .../dialects/tests/scf/forall.mlir | 62 -- .../dialects/tests/scf/if.mlir | 69 -- .../dialects/tests/scf/parallel.mlir | 44 - .../dialects/tests/scf/while.mlir | 45 - .../dialects/tests/tensor/collapse_shape.mlir | 42 - .../dialects/tests/tensor/dim.mlir | 12 - .../dialects/tests/tensor/empty.mlir | 21 - .../dialects/tests/tensor/expand_shape.mlir | 55 -- .../dialects/tests/tensor/extract.mlir | 13 - .../dialects/tests/tensor/extract_slice.mlir | 62 -- .../dialects/tests/tensor/from_elements.mlir | 25 - .../dialects/tests/tensor/generate.mlir | 29 - .../dialects/tests/tensor/insert.mlir | 14 - .../dialects/tests/tensor/insert_slice.mlir | 25 - .../dialects/tests/tensor/pad.mlir | 38 - .../dialects/tests/thlo/concatenate.mlir | 16 - .../dialects/tests/thlo/reverse.mlir | 14 - .../dialects/tests/thlo/scatter.mlir | 49 - .../dialects/tests/vector/bitcast.mlir | 32 - .../dialects/tests/vector/broadcast.mlir | 51 - .../dialects/tests/vector/compressstore.mlir | 16 - .../dialects/tests/vector/constant_mask.mlir | 14 - .../dialects/tests/vector/contract.mlir | 141 --- .../dialects/tests/vector/create_mask.mlir | 16 - .../dialects/tests/vector/expandload.mlir | 19 - .../dialects/tests/vector/extract.mlir | 52 - .../tests/vector/extract_strided_slice.mlir | 18 - .../dialects/tests/vector/extractelement.mlir | 22 - .../dialects/tests/vector/flat_transpose.mlir | 23 - .../dialects/tests/vector/fma.mlir | 13 - .../dialects/tests/vector/gather.mlir | 50 - .../dialects/tests/vector/insert.mlir | 57 -- .../tests/vector/insert_strided_slice.mlir | 17 - .../dialects/tests/vector/insertelement.mlir | 24 - .../dialects/tests/vector/invalid.mlir | 27 - .../dialects/tests/vector/load.mlir | 27 - .../dialects/tests/vector/maskedload.mlir | 19 - .../dialects/tests/vector/maskedstore.mlir | 18 - .../tests/vector/multi_reduction.mlir | 46 - .../dialects/tests/vector/outerproduct.mlir | 155 --- .../dialects/tests/vector/reduction.mlir | 235 ----- .../dialects/tests/vector/shape_cast.mlir | 23 - .../dialects/tests/vector/shuffle.mlir | 34 - .../dialects/tests/vector/splat.mlir | 11 - .../dialects/tests/vector/store.mlir | 39 - .../dialects/tests/vector/transfer_read.mlir | 118 --- .../dialects/tests/vector/transfer_write.mlir | 91 -- .../dialects/tests/vector/transpose.mlir | 28 - .../dialects/tests/vector/type_cast.mlir | 11 - .../dialects/tests/vector/vscale.mlir | 12 - .../tests/xla_cpu/memref_element_cast.mlir | 11 - .../tools/mlir_interpreter/dialects/thlo.cc | 121 --- .../tools/mlir_interpreter/dialects/util.cc | 169 ---- .../tools/mlir_interpreter/dialects/util.h | 81 -- .../tools/mlir_interpreter/dialects/vector.cc | 857 ----------------- .../tools/mlir_interpreter/dialects/xla.cc | 34 - .../mlir_interpreter/framework/interpreter.cc | 142 --- .../mlir_interpreter/framework/interpreter.h | 195 ---- .../framework/interpreter_value.cc | 377 -------- .../framework/interpreter_value.h | 226 ----- .../framework/interpreter_value_util.h | 184 ---- .../framework/registration.cc | 120 --- .../mlir_interpreter/framework/registration.h | 225 ----- .../framework/tensor_or_memref.cc | 157 ---- .../framework/tensor_or_memref.h | 363 ------- .../mlir_interpreter/framework/tests/BUILD | 24 - .../framework/tests/interpreter_value_test.cc | 235 ----- .../framework/tests/tensor_or_memref_test.cc | 104 -- .../mlir-interpreter-runner.cc | 142 --- 200 files changed, 2 insertions(+), 16127 deletions(-) delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/BUILD delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/README.md delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir delete mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir.pb delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/BUILD delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/README.md delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.h delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace.proto delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.cc delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.h delete mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils_test.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/affine.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/arith.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/bufferization.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/builtin.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/comparators.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/complex.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/cwise_math.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/deallocation.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/func.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/gml_st.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/linalg.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/math.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/memref.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/mhlo.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/mhlo_binary_cwise.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/mhlo_unary_cwise.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/scf.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tensor.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/BUILD delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/affine/apply.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/affine/minmax.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/bitcast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/cmpf.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/cmpi.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/constant.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/extf.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/fptosi.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/index_cast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/int_math.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/minmax.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/negf.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/remf.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/select.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/sitofp.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/uitofp.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/arith/vector_math.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/bufferization/alloc_tensor.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/bufferization/clone.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/bufferization/to_memref.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/bufferization/to_tensor.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/builtin/unrealized_conversion_cast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/complex/complex.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/deallocation/deallocation.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/func/call.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/gml_st/fusion.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/broadcast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/dot.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/fill.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/generic.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/map.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/matmul.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/reduce.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/transpose.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/linalg/vecmat.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/math/math.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/alloc.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/collapse_shape.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/copy.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/dim.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/expand_shape.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/get_global.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/invalid.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/load.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/memref/subview.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/bitcast_convert.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/broadcast_in_dim.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/case.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/clamp.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/compare.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/complex_math.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/compute_reshape_shape.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/concatenate.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/constant.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/convert.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/dot.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/dot_general.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/dynamic_slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/dynamic_update_slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/float_math.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/gather.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/int_math.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/iota.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/pad.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/reduce.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/reshape.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/scatter.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/select.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/sort.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/subtract.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/transpose.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/tuple.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/mhlo/while.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/scf/for.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/scf/forall.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/scf/if.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/scf/parallel.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/scf/while.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/collapse_shape.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/dim.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/empty.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/expand_shape.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/extract.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/extract_slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/from_elements.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/generate.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/insert.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/insert_slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/tensor/pad.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/thlo/concatenate.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/thlo/reverse.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/thlo/scatter.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/bitcast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/broadcast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/compressstore.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/constant_mask.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/contract.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/create_mask.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/expandload.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/extract.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/extract_strided_slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/extractelement.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/flat_transpose.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/fma.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/gather.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/insert.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/insert_strided_slice.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/insertelement.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/invalid.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/load.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/maskedload.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/maskedstore.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/multi_reduction.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/outerproduct.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/reduction.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/shape_cast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/shuffle.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/splat.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/store.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/transfer_read.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/transfer_write.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/transpose.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/type_cast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/vector/vscale.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/tests/xla_cpu/memref_element_cast.mlir delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/thlo.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/util.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/util.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/vector.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/xla.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value_util.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/registration.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/registration.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/tensor_or_memref.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/tensor_or_memref.h delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/tests/BUILD delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/tests/interpreter_value_test.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/framework/tests/tensor_or_memref_test.cc delete mode 100644 third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/mlir-interpreter-runner.cc diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/BUILD deleted file mode 100644 index d5c2f3747f84b0..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/BUILD +++ /dev/null @@ -1,69 +0,0 @@ -load("//xla:xla.bzl", "xla_cc_binary") -load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") -load("@bazel_skylib//rules:build_test.bzl", "build_test") - -# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) - -build_test( - name = "mlir-bisect_build_test", - targets = [ - ":mlir-bisect", - ], -) - -xla_cc_binary( - name = "mlir-bisect", - srcs = ["mlir_bisect.cc"], - visibility = ["//visibility:public"], - deps = [ - ":bisect_lib", - "//xla/mlir/runtime/ir:rt", - "//xla/mlir/tools/mlir_bisect/rewrites", - "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", - "//xla/mlir_hlo:deallocation", - "//xla/mlir_hlo:deallocation_passes", - "//xla/mlir_hlo:gml_st", - "//xla/mlir_hlo:gml_st_passes", - "//xla/mlir_hlo:gml_st_test_passes", - "//xla/mlir_hlo:hlo_dialect_registration", - "//xla/mlir_hlo:lhlo", - "//xla/mlir_hlo:lmhlo_passes", - "//xla/mlir_hlo:mhlo_passes", - "//xla/mlir_hlo:mlir_interpreter_dialects", - "//xla/mlir_hlo:mlir_interpreter_framework", - "//xla/mlir_hlo:thlo", - "//xla/mlir_hlo:thlo_passes", - "//xla/service:hlo_proto_cc", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:AllPassesAndDialects", - "@llvm-project//mlir:ArithDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:MlirReduceLib", - "@llvm-project//mlir:Pass", - "@llvm-project//mlir:Support", - "@local_tsl//tsl/platform:env", - "@local_tsl//tsl/platform:platform_port", - ], -) - -cc_library( - name = "bisect_lib", - srcs = [ - "bisect_lib.cc", - "test_passes.cc", - ], - hdrs = [ - "bisect_lib.h", - "test_passes.h", - ], - visibility = ["//visibility:public"], - deps = [ - "//xla/mlir/tools/mlir_replay/public:execution_trace_proto_cc", - "//xla/mlir/tools/mlir_replay/public:execution_trace_proto_cc_impl", - "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:LinalgDialect", - "@llvm-project//mlir:Pass", - "@llvm-project//mlir:Support", - ], -) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/README.md b/third_party/xla/xla/mlir/tools/mlir_bisect/README.md deleted file mode 100644 index 570e92b6e53802..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/README.md +++ /dev/null @@ -1,85 +0,0 @@ -# MLIR HLO mlir_bisect - -This is a test case reduction tool, similar in purpose to `mlir-reduce`, but -specific to the `mlir-interpreter` infrastructure. In particular, reductions can -depend on concrete values encountered during execution, and reductions can (and -usually do) generate multiple candidates. - -For example, the `ReplaceOpWithConstant` reduction will attempt to replace each -op with each of its results. If the op is in a loop, each execution will be a -candidate for replacement. - -## Using this tool - -1. Run a JAX test with snapshots enabled: - - ``` - bazel test some-jax-test - --test_env=XLA_FLAGS="--xla_cpu_use_xla_runtime --xla_dump_to=/tmp/dump - --xla_dump_hlo_snapshots" --test_filter=SomeSpecific.Test - --test_sharding_strategy=disabled --test_strategy=local - ``` - -1. Figure out the culprit module and pass (sorry, no automation yet): - - ``` - bazel run tensorflow/compiler/xla/mlir/tools/mlir_replay:mlir_replay -- \ - --mlir-compilation-trace=/tmp/dump/module_0000.jit__something.mlir-trace.pb \ - --hlo-snapshot=/tmp/dump/module_0000.jit__something.snapshot.0.pb \ - --print-changes-only \ - --execution-trace-dir=/tmp/execution - ``` - - You should see a pass after which results change. You'll want to use the - .mlir file in `/tmp/execution` corresponding to the pass *before* that with - the bisect tool. - - Note: If the failing pass is bufferization, you may have to use an earlier - snapshot, e.g. before EmptyTensorToAllocTensor. -1. Run bisect: - - ``` - bazel run tensorflow/compiler/xla/mlir/tools/mlir_bisect:mlir-bisect -- \ - --hlo-snapshot=/tmp/dump/module_0000.jit_something.snapshot.0.pb \ - --pass-pipeline="builtin.module(empty-tensor-to-alloc-tensor,one-shot-bufferize{allow-return-allocs bufferize-function-boundaries create-deallocs=0})" \ - /tmp/execution/0052.ScalarizationPass.mlir - ``` - -## Adding a reduction - -To add a reduction, create a function that generates the candidates and register -it: - -``` -SmallVector> -FrobulateAndDefenestrate(BisectState&, dialect::SomeOp some_op) { - auto [cloned_module_1, cloned_op_1] = CloneModuleFor(some_op); - Frobulate(cloned_op_1); - - auto [cloned_module_2, cloned_op_2] = CloneModuleFor(some_op); - Defenestrate(cloned_op_2); - - return {cloned_module_1, cloned_module_2}; -} - -REGISTER_MLIR_REDUCE_STRATEGY(FrobulateAndDefenestrate); -``` - -Then, add a test for the strategy. Make sure your strategy is linked into -mlir-bisect and has `alwayslink` set. - -``` -// RUN: mlir-bisect %s --debug-strategy=FrobulateAndDefenestrate | FileCheck %s - -func.func @main() { - dialect.some_op() -} - -// CHECK: func @main() -// CHECK-NEXT: frobulated - -// CHECK: func @main() -// CHECK-NEXT: defenestrated -``` - -`--debug-strategy` will print all candidates generated by the given strategy. diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc deleted file mode 100644 index 6bf380926fe40b..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" - -#include -#include -#include -#include - -#include "mlir/Support/LLVM.h" // from @llvm-project - -namespace mlir { -namespace bisect { - -Operation* FindInClone(Operation* op, ModuleOp clone) { - if (llvm::isa(op)) { - return clone; - } - - auto* parent_clone = FindInClone(op->getParentOp(), clone); - auto cloned_ops = - parent_clone->getRegions()[op->getParentRegion()->getRegionNumber()] - .getOps(); - for (auto [original_op, cloned_op] : - llvm::zip(op->getParentRegion()->getOps(), cloned_ops)) { - if (&original_op == op) { - return &cloned_op; - } - } - - llvm_unreachable("Op not found in clone."); -} - -std::pair, Operation*> CloneModuleFor(Operation* op) { - auto module = op->getParentOfType().clone(); - return {OwningOpRef{module}, FindInClone(op, module)}; -} - -namespace detail { - -DenseMap>& -GetStrategies() { - static auto* strategies = - new DenseMap>(); - return *strategies; -} - -void RegisterReduceStrategy( - StringRef name, - std::function fn) { - GetStrategies()[name] = fn; -} - -CandidateVector GetCandidates( - const std::function& strategy, - BisectState& state, ModuleOp op) { - assert(strategy && "GetCandidates was passed a null strategy"); - CandidateVector result; - op.lookupSymbol("main")->walk([&](Operation* subOp) { - llvm::move(strategy(state, subOp), std::back_inserter(result)); - }); - return result; -} - -} // namespace detail -} // namespace bisect -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h b/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h deleted file mode 100644 index d3d25471e52820..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_MLIR_TOOLS_MLIR_BISECT_BISECT_LIB_H_ -#define XLA_MLIR_TOOLS_MLIR_BISECT_BISECT_LIB_H_ - -#include -#include -#include - -#include "mlir/IR/BuiltinOps.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/mlir/tools/mlir_replay/public/execution_trace.pb.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" - -#define REGISTER_MLIR_REDUCE_STRATEGY(name) \ - static int name##_init = []() { \ - ::mlir::bisect::detail::RegisterReduceStrategy(#name, name); \ - return 1; \ - }(); - -namespace mlir { -namespace bisect { - -class BisectState { - public: - void SetTrace(mlir::interpreter::ExecutionTrace trace) { - trace_ = std::move(trace); - } - - // Returns all executions of the given op. - llvm::SmallVector GetExecutions( - mlir::Operation* op) const { - return interpreter::FindOpExecutionsInTrace(trace_, op); - } - - private: - mlir::interpreter::ExecutionTrace trace_; -}; - -std::pair, Operation*> CloneModuleFor(Operation* op); -Operation* FindInClone(Operation* op, ModuleOp clone); - -template -std::pair, Op> CloneModuleFor(Op op) { - auto [module, op_clone] = CloneModuleFor(op.getOperation()); - return {std::move(module), llvm::cast(op_clone)}; -} - -namespace detail { - -using CandidateVector = SmallVector()>>; - -CandidateVector GetCandidates( - const std::function& strategy, - BisectState& state, ModuleOp op); - -DenseMap>& -GetStrategies(); - -// Registers a strategy that applies to all ops. -void RegisterReduceStrategy( - StringRef name, - std::function fn); - -// Registers a strategy that applies to specific ops. -template -void RegisterReduceStrategy(StringRef name, - CandidateVector (*fn)(BisectState&, Op)) { - RegisterReduceStrategy( - name, [fn](BisectState& state, Operation* op) -> CandidateVector { - if (auto cast = llvm::dyn_cast(op)) { - return fn(state, cast); - } - return {}; - }); -} - -} // namespace detail - -} // namespace bisect -} // namespace mlir - -#endif // XLA_MLIR_TOOLS_MLIR_BISECT_BISECT_LIB_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc deleted file mode 100644 index b5fbb3ec930fd7..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc +++ /dev/null @@ -1,363 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include -#include -#include -#include - -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" -#include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project -#include "mlir/IR/BuiltinOps.h" // from @llvm-project -#include "mlir/IR/DialectRegistry.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OwningOpRef.h" // from @llvm-project -#include "mlir/IR/Verifier.h" // from @llvm-project -#include "mlir/InitAllDialects.h" // from @llvm-project -#include "mlir/InitAllPasses.h" // from @llvm-project -#include "mlir/Pass/PassManager.h" // from @llvm-project -#include "mlir/Pass/PassRegistry.h" // from @llvm-project -#include "mlir/Support/FileUtilities.h" // from @llvm-project -#include "mlir/Tools/ParseUtilities.h" // from @llvm-project -#include "xla/mlir/runtime/ir/rt_dialect.h" -#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" -#include "xla/mlir/tools/mlir_bisect/test_passes.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" -#include "xla/mlir_hlo/deallocation/IR/deallocation_ops.h" -#include "xla/mlir_hlo/deallocation/transforms/passes.h" -#include "xla/mlir_hlo/gml_st/IR/gml_st_ops.h" -#include "xla/mlir_hlo/gml_st/transforms/passes.h" -#include "xla/mlir_hlo/gml_st/transforms/test_passes.h" -#include "xla/mlir_hlo/lhlo/IR/lhlo_ops.h" -#include "xla/mlir_hlo/lhlo/transforms/passes.h" -#include "xla/mlir_hlo/mhlo/IR/register.h" -#include "xla/mlir_hlo/mhlo/transforms/passes.h" -#include "xla/mlir_hlo/thlo/IR/thlo_ops.h" -#include "xla/mlir_hlo/thlo/transforms/passes.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter.h" -#include "xla/service/hlo.pb.h" -#include "tsl/platform/env.h" -#include "tsl/platform/init_main.h" - -struct Options { - llvm::cl::opt input_filename{llvm::cl::Positional, - llvm::cl::desc(""), - llvm::cl::init("-")}; - llvm::cl::opt hlo_snapshot{ - "hlo-snapshot", - llvm::cl::desc( - "If set, get argument values from the given snapshot. If not set, " - "the input function must not have any arguments."), - llvm::cl::init("")}; - llvm::cl::opt debug_strategy{ - "debug-strategy", - llvm::cl::desc("If set, print all reductions for the given strategy and " - "exit. For testing."), - llvm::cl::init("")}; - llvm::cl::opt expected_error{ - "expected-error", - llvm::cl::desc("If set, expect the given error message after applying " - "the pass instead of a successful execution."), - llvm::cl::init("")}; - llvm::cl::opt max_steps_per_run{ - "max-steps-per-run", - llvm::cl::desc("Maximum number of steps to execute for each attempt."), - llvm::cl::init(100000)}; - mlir::PassPipelineCLParser pass_pipeline{"", "Passes to run"}; - llvm::cl::opt canonicalize{ - "enable-canonicalization", - llvm::cl::desc("If set, canonicalize candidates before trying them. Set " - "to false if you're bisecting --canonicalize."), - llvm::cl::init(true)}; -}; - -namespace mlir { -namespace bisect { -namespace { - -OwningOpRef ParseMlirInput(llvm::StringRef inputFilename, - MLIRContext* context) { - std::string error_message; - auto file = mlir::openInputFile(inputFilename, &error_message); - if (!file) { - llvm::errs() << error_message << "\n"; - return {}; - } - - auto source_mgr = std::make_shared(); - source_mgr->AddNewSourceBuffer(std::move(file), SMLoc()); - return parseSourceFile(source_mgr, context); -} - -LogicalResult RunPipeline(ModuleOp module, const Options& options) { - if (!options.pass_pipeline.hasAnyOccurrences()) { - return mlir::success(); - } - - auto error_handler = [&](const Twine& msg) { - llvm::errs() << msg << "\n"; - return failure(); - }; - PassManager pm(module.getContext()); - if (failed(options.pass_pipeline.addToPipeline(pm, error_handler)) || - failed(pm.run(module))) { - llvm::errs() << "pipeline failed\n"; - return failure(); - } - return success(); -} - -LogicalResult Run(mlir::Operation* module, interpreter::ExecutionTrace* trace, - const Options& options) { - SymbolTable symbol_table{module}; - interpreter::ExecutionTraceListener tracer(trace); - interpreter::InterpreterOptions interpreter_options; - interpreter_options.listener = &tracer; - interpreter_options.maxSteps = options.max_steps_per_run; - auto results_before_pass = interpreter::runInterpreter( - symbol_table, llvm::cast(symbol_table.lookup("main")), {}, - interpreter_options); - - if (!succeeded(results_before_pass)) { - llvm::errs() << "Interpreter failed\n"; - return failure(); - } - - if (!options.debug_strategy.empty()) { - return success(); - } - - OwningOpRef clone(llvm::cast(module).clone()); - if (!succeeded(RunPipeline(*clone, options))) { - return failure(); - } - - SymbolTable symbol_table_after{*clone}; - interpreter_options.listener = nullptr; - bool found_expected_error = false; - if (!options.expected_error.empty()) { - auto original_handler = interpreter_options.errorHandler; - interpreter_options.errorHandler = [&](llvm::StringRef failure) { - found_expected_error |= - failure.find(options.expected_error) != std::string::npos; - original_handler(failure); - }; - } - - auto results_after_pass = interpreter::runInterpreter( - symbol_table_after, - llvm::cast(symbol_table_after.lookup("main")), {}, - interpreter_options); - - if (!succeeded(results_after_pass)) { - if (found_expected_error) { - return success(); - } - llvm::errs() << "Interpreter failed\n"; - return failure(); - } else if (!options.expected_error.empty()) { - llvm::errs() << "Expected error not seen\n"; - return failure(); - } - - // If the results are the same, the bug is no longer present. - if (*results_before_pass == *results_after_pass) { - return failure(); - } - - llvm::errs() << "results before:\n"; - for (auto& result : *results_before_pass) { - llvm::errs() << " " << result.toString() << "\n"; - } - llvm::errs() << "\nresults after:\n"; - for (auto& result : *results_after_pass) { - llvm::errs() << " " << result.toString() << "\n"; - } - - return success(); -} - -LogicalResult Canonicalize(ModuleOp module) { - PassManager pm(module.getContext()); - pm.addPass(createCanonicalizerPass()); - return pm.run(module.getOperation()); -} - -OwningOpRef ReduceModule(OwningOpRef module, - BisectState& state, const Options& options) { - auto strategies = llvm::to_vector(mlir::bisect::detail::GetStrategies()); - - auto apply_step = [&]() -> std::optional> { - for (auto it = strategies.begin(); it != strategies.end(); ++it) { - for (auto& candidate_fn : - detail::GetCandidates(it->second, state, *module)) { - auto candidate = candidate_fn(); - if (!candidate || !mlir::verify(*candidate).succeeded()) { - continue; - } - if (options.canonicalize && !Canonicalize(*candidate).succeeded()) { - continue; - } - - interpreter::ExecutionTrace trace; - // Verify that the candidate is still buggy. - if (!Run(*candidate, &trace, options).succeeded()) { - continue; - } - - // Print the new buggy module. - llvm::outs() << "module after " << it->first << ":\n" - << *candidate << "\n\n"; - - // Update the trace. - state.SetTrace(trace); - - // Move strategies to the end. - decltype(strategies) new_strategies; - std::copy(it + 1, strategies.end(), std::back_inserter(new_strategies)); - std::copy(strategies.begin(), it + 1, - std::back_inserter(new_strategies)); - strategies = new_strategies; - return {candidate.release()}; - } - } - return std::nullopt; - }; - - while (auto new_module = apply_step()) { - module = std::move(*new_module); - } - return module; -} - -void ReplaceArgsWithConstants(ModuleOp module, - const xla::HloSnapshot& snapshot) { - auto main = llvm::cast(module.lookupSymbol("main")); - OpBuilder b(main.getBody()); - for (auto [arg, bbarg] : - llvm::zip(snapshot.arguments(), main.getBody().getArguments())) { - auto attr = interpreter::ValueToAttribute( - *interpreter::LiteralToValue(*xla::Literal::CreateFromProto(arg)), - bbarg.getType()); - CHECK_EQ(attr.size(), 1) << "unsupported argument"; - - auto constant = b.create( - main.getLoc(), bbarg.getType(), llvm::cast(attr.front())); - bbarg.replaceAllUsesWith(constant); - } - - // The remaining ops are output args, so we replace them with allocs. - for (auto arg : - main.getBody().getArguments().drop_front(snapshot.arguments().size())) { - CHECK(llvm::isa(arg.getType())) << "unsupported argument"; - arg.replaceAllUsesWith(b.create( - module.getLoc(), llvm::cast(arg.getType()))); - } - while (main.getBody().getNumArguments() > 0) { - main.getBody().eraseArgument(0); - } - main.setFunctionType(FunctionType::get(main.getContext(), /*inputs=*/{}, - main.getFunctionType().getResults())); - main.setArgAttrsAttr(b.getArrayAttr({})); -} - -} // namespace -} // namespace bisect -} // namespace mlir - -int main(int argc, char* argv[]) { - llvm::errs().tie(&llvm::outs()); - llvm::outs().tie(&llvm::errs()); - int dummy_argc = 1; - tsl::port::InitMain("", &dummy_argc, &argv); - - Options options; - llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR bisect tool\n"); - - mlir::DialectRegistry registry; - mlir::registerAllDialects(registry); - mlir::registerAllPasses(); - mlir::bisect::test::RegisterTestPasses(); - mlir::mhlo::registerAllMhloPasses(); - mlir::lmhlo::registerAllLmhloPasses(); - mlir::thlo::registerAllThloPasses(); - mlir::gml_st::registerGmlStPasses(); - mlir::gml_st::registerGmlStTestPasses(); - mlir::mhlo::registerAllMhloDialects(registry); - mlir::deallocation::registerDeallocationPasses(); - - registry.insert(); - - mlir::MLIRContext context(registry); - context.getOrLoadDialect(); - auto module = mlir::bisect::ParseMlirInput(options.input_filename, &context); - - if (!options.hlo_snapshot.empty()) { - xla::HloSnapshot snapshot; - TF_CHECK_OK(tsl::ReadBinaryProto(tsl::Env::Default(), options.hlo_snapshot, - &snapshot)); - mlir::bisect::ReplaceArgsWithConstants(*module, snapshot); - } - - if (options.debug_strategy.empty()) { - llvm::outs() << "initial module:\n" << *module << "\n"; - } - - mlir::interpreter::ExecutionTrace trace; - if (!mlir::bisect::Run(*module, &trace, options).succeeded()) { - llvm::outs() << "Did not find bug in initial module\n"; - if (options.pass_pipeline.hasAnyOccurrences() && - mlir::succeeded(mlir::bisect::RunPipeline(*module, options))) { - llvm::outs() << "Module after running pipeline:\n" << *module << "\n"; - } - return 1; - } - - mlir::bisect::BisectState state; - state.SetTrace(trace); - if (!options.debug_strategy.empty()) { - bool some_failed = false; - for (auto& candidate : mlir::bisect::detail::GetCandidates( - mlir::bisect::detail::GetStrategies()[options.debug_strategy], - state, *module)) { - auto new_module = candidate(); - if (!new_module) { - continue; - } - llvm::outs() << *new_module << "\n\n"; - if (!mlir::verify(*new_module).succeeded()) { - some_failed = true; - llvm::errs() << "verification failed\n"; - } - } - return some_failed ? 1 : 0; - } - - module = mlir::bisect::ReduceModule(std::move(module), state, options); - - llvm::outs() << "Final module:\n" << *module << "\n"; - if (options.pass_pipeline.hasAnyOccurrences() && - mlir::succeeded(mlir::bisect::RunPipeline(*module, options))) { - llvm::outs() << "Final module after running pipeline:\n" << *module << "\n"; - } - return 0; -} diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD deleted file mode 100644 index ef89338a3671a7..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD +++ /dev/null @@ -1,26 +0,0 @@ -load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") - -# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) - -cc_library( - name = "rewrites", - srcs = [ - "func.cc", - "general.cc", - "scf.cc", - ], - visibility = ["//visibility:public"], - deps = [ - "//xla/mlir/tools/mlir_bisect:bisect_lib", - "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", - "//xla/mlir_hlo:gml_st", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:ArithDialect", - "@llvm-project//mlir:DialectUtils", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:SCFDialect", - "@llvm-project//mlir:Support", - ], - alwayslink = 1, -) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc deleted file mode 100644 index d8e6257615d221..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" - -namespace mlir { -namespace bisect { -namespace { - -void SetReturnValues(func::FuncOp func, ValueRange values) { - // We only operate on functions without arguments. - func.setFunctionType(mlir::FunctionType::get(func.getContext(), /*inputs=*/{}, - values.getTypes())); - func.getBody().getBlocks().front().getTerminator()->setOperands(values); -} - -SmallVector()>> TruncateFunction( - BisectState&, func::FuncOp func) { - SmallVector()>> result; - for (auto& ret : func.getBody().getBlocks().front().without_terminator()) { - if (func.getBody().getBlocks().front().getTerminator()->getOperands() == - ret.getResults()) { - continue; - } - auto fun = [r = &ret]() -> OwningOpRef { - auto [module, ret_clone] = CloneModuleFor(r); - SetReturnValues(ret_clone->getParentOfType(), - ret_clone->getResults()); - return std::move(module); - }; - result.push_back(fun); - } - return result; -} - -SmallVector()>> -ReturnOperandsOfTerminatorOperands(BisectState&, func::FuncOp func) { - SmallVector()>> result; - result.push_back([func]() -> OwningOpRef { - auto [module, func_clone] = CloneModuleFor(func); - auto* terminator = func_clone.getBody().getBlocks().front().getTerminator(); - SmallVector new_operands; - for (auto operand : terminator->getOperands()) { - if (operand.getDefiningOp()) { - llvm::copy(operand.getDefiningOp()->getOperands(), - std::back_inserter(new_operands)); - } else { - return nullptr; - } - } - SetReturnValues(func_clone, new_operands); - return std::move(module); - }); - return result; -} - -REGISTER_MLIR_REDUCE_STRATEGY(TruncateFunction); -REGISTER_MLIR_REDUCE_STRATEGY(ReturnOperandsOfTerminatorOperands); - -} // namespace -} // namespace bisect -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc deleted file mode 100644 index 7b624016c3e028..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc +++ /dev/null @@ -1,186 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/OpDefinition.h" // from @llvm-project -#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" - -namespace mlir { -namespace bisect { -namespace { - -bool IsTerminator(Operation* op) { - return op->hasTrait(); -} - -bool IsTopLevelOp(Operation* op) { - return !op->getBlock()->back().mightHaveTrait(); -} - -SmallVector()>> EraseOpWithoutResults( - BisectState& state, Operation* op) { - // Only erase ops with results if they're unused. - if (op->getNumResults() > 0 && !op->use_empty()) { - return {}; - } - - // Don't erase entire functions, constants, terminators. - if (IsTopLevelOp(op) || IsTerminator(op)) { - return {}; - } - - SmallVector()>> ret; - ret.push_back([op]() { - auto [module, cloned_op] = CloneModuleFor(op); - cloned_op->erase(); - return std::move(module); - }); - return ret; -} - -llvm::SmallVector()>> ReplaceOpWithConstant( - BisectState& state, Operation* op) { - llvm::SmallVector()>> result; - if (op->hasTrait() || IsTopLevelOp(op) || - IsTerminator(op) || op->use_empty() || op->getNumResults() == 0) { - return result; - } - - auto mii = llvm::dyn_cast(op); - if (mii && mii.hasEffect()) { - // Don't replace allocs with constants. - return result; - } - - // Ops that are never executed won't be replaced here, but we have other - // strategies that get rid of them (e.g. deleting the entire region). - for (auto* execution : state.GetExecutions(op)) { - assert(execution->results_size() == op->getNumResults() && - "unexpected number of results"); - - result.push_back([execution, op]() -> OwningOpRef { - auto [module_clone, op_clone] = CloneModuleFor(op); - SmallVector results; - OpBuilder b(op_clone); - for (int64_t i = 0; i < op->getNumResults(); ++i) { - auto type = op->getResultTypes()[i]; - auto value = *interpreter::TracedValueToValue( - execution->results(static_cast(i))); - auto attribute = interpreter::ValueToAttribute(value, type); - // We don't currently support tuples. - if (attribute.size() != 1) { - return nullptr; - } - op_clone->getResults()[i].replaceAllUsesWith( - b.create( - op_clone->getLoc(), type, - llvm::cast(attribute.front()))); - } - return std::move(module_clone); - }); - } - return result; -} - -llvm::SmallVector()>> -ReplaceOperandWithConstant(BisectState& state, Operation* op) { - llvm::SmallVector()>> result; - if (IsTopLevelOp(op) || op->getNumOperands() == 0) { - return result; - } - - for (auto* execution : state.GetExecutions(op)) { - for (int64_t i = 0; i < op->getNumOperands(); ++i) { - auto operand = op->getOperand(i); - if (operand.getDefiningOp() && - operand.getDefiningOp()->hasTrait()) { - continue; - } - result.push_back([execution, i, op]() -> OwningOpRef { - auto type = op->getOperandTypes()[i]; - auto value = *interpreter::TracedValueToValue( - execution->args(static_cast(i))); - auto attribute = interpreter::ValueToAttribute(value, type); - if (attribute.size() != 1) { - return nullptr; - } - auto [module_clone, op_clone] = CloneModuleFor(op); - OpBuilder b(op_clone); - op_clone->setOperand(i, b.create( - op_clone->getLoc(), type, - llvm::cast(attribute.front()))); - return std::move(module_clone); - }); - } - } - return result; -} - -// Replaces an op's result with some other value with the same type defined -// previously in the same region. -llvm::SmallVector()>> ReplaceOpWithValue( - BisectState&, Operation* op) { - llvm::SmallVector()>> ret; - if (op->hasTrait() || IsTopLevelOp(op) || - IsTerminator(op)) { - return ret; - } - - // TODO(jreiffers): Consider bbargs. - llvm::DenseMap>> - candidates_by_type; - for (auto* pred = op->getPrevNode(); pred != nullptr; - pred = pred->getPrevNode()) { - for (auto [index, result] : llvm::enumerate(pred->getResults())) { - candidates_by_type[result.getType()].emplace_back(pred, index); - } - } - - for (auto [index, result] : llvm::enumerate(op->getResults())) { - if (result.use_empty()) { - continue; - } - - for (auto [new_result_op, new_result_index] : - candidates_by_type[result.getType()]) { - ret.push_back( - [op, i = index, j = new_result_index, result_op = new_result_op]() { - auto [module_clone, op_clone] = CloneModuleFor(op); - op_clone->getResults()[i].replaceAllUsesWith( - FindInClone(result_op, module_clone.get())->getResults()[j]); - return std::move(module_clone); - }); - } - } - return ret; -} - -REGISTER_MLIR_REDUCE_STRATEGY(EraseOpWithoutResults); -REGISTER_MLIR_REDUCE_STRATEGY(ReplaceOpWithConstant); -REGISTER_MLIR_REDUCE_STRATEGY(ReplaceOpWithValue); -REGISTER_MLIR_REDUCE_STRATEGY(ReplaceOperandWithConstant); - -} // namespace -} // namespace bisect -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc deleted file mode 100644 index 72f90372b7d5f9..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/Dialect/SCF/IR/SCF.h" // from @llvm-project - -#include // NOLINT -#include // NOLINT - -#include "mlir/Dialect/Utils/StaticValueUtils.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/OwningOpRef.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" - -namespace mlir { -namespace bisect { -namespace { - -constexpr int64_t kMaxWhileIterations = 1; - -// Rewrites a while loop to execute its body a fixed number of times. The -// condition is executed, but its result is ignored. -// For ease of implementation, this generates scf.execute_region ops. These are -// subsequently canonicalized away. -llvm::SmallVector()>> InlineScfWhile( - BisectState&, scf::WhileOp while_op) { - llvm::SmallVector()>> result; - for (int64_t num_executions = 0; num_executions <= kMaxWhileIterations; - ++num_executions) { - using ::mlir::scf::ExecuteRegionOp; - - result.push_back([while_op, num_executions]() -> OwningOpRef { - auto [module, op] = CloneModuleFor(while_op); - OpBuilder b(op); - llvm::SmallVector regions; - - auto wrap_region_in_execute = [&, - loc = op.getLoc()](mlir::Region& region) { - regions - .emplace_back(b.create( - loc, - region.getBlocks().front().getTerminator()->getOperandTypes(), - mlir::ValueRange{})) - .getRegion() - .takeBody(region); - }; - - wrap_region_in_execute(op.getBefore()); - // Replace the condition terminator with a yield terminator. - { - auto& before_block = regions[0].getRegion().getBlocks().front(); - OpBuilder before_builder(before_block.getTerminator()); - IRRewriter before_rewriter(before_builder); - before_rewriter.replaceOpWithNewOp( - before_block.getTerminator(), - before_block.getTerminator()->getOperands()); - } - - // Clone the execute region ops the requested number of times. - if (num_executions > 0) { - wrap_region_in_execute(op.getAfter()); - for (int64_t i = 0; i < num_executions - 1; ++i) { - b.insert(regions.emplace_back(regions[0].clone())); - b.insert(regions.emplace_back(regions[1].clone())); - } - b.insert(regions.emplace_back(regions[0].clone())); - } - - // Rewire region arguments and erase them. - for (int64_t i = 0; i < regions.size(); ++i) { - auto args = i == 0 ? ValueRange{op.getOperands()} - : ValueRange{regions[i - 1].getResults()}; - bool is_after_region = (i & 1) == 1; - auto& region = regions[i].getRegion(); - for (int64_t arg = static_cast(region.getNumArguments()) - 1; - arg >= 0; --arg) { - region.getArgument(arg).replaceAllUsesWith( - args[is_after_region ? arg + 1 : arg]); - region.eraseArgument(arg); - } - } - op->replaceAllUsesWith(regions.back().getResults().drop_front(1)); - op->erase(); - return std::move(module); - }); - } - return result; -} - -SmallVector()>> ReduceScfForallBounds( - BisectState&, scf::ForallOp forall_op) { - SmallVector new_upper_bound{forall_op.getMixedUpperBound()}; - OpBuilder b(forall_op); - bool any_replaced = false; - for (auto& ub : new_upper_bound) { - auto constant_or = mlir::getConstantIntValue(ub); - if (!constant_or.has_value()) { - continue; - } - any_replaced = true; - ub = b.getIndexAttr(*constant_or - 1); - } - SmallVector()>> result; - if (!any_replaced) { - return result; - } - result.push_back([=]() -> OwningOpRef { - auto [module, op] = CloneModuleFor(forall_op); - OpBuilder b(op); - SmallVector dynamic_upper_bound; - SmallVector static_upper_bound; - dispatchIndexOpFoldResults(new_upper_bound, dynamic_upper_bound, - static_upper_bound); - op.getDynamicUpperBoundMutable().assign(dynamic_upper_bound); - op.setStaticUpperBound(static_upper_bound); - return std::move(module); - }); - return result; -} - -REGISTER_MLIR_REDUCE_STRATEGY(ReduceScfForallBounds); -REGISTER_MLIR_REDUCE_STRATEGY(InlineScfWhile); - -} // namespace -} // namespace bisect -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD deleted file mode 100644 index 860af275d4726e..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//xla:glob_lit_test.bzl", "glob_lit_tests") -load("@local_tsl//tsl:tsl.default.bzl", "filegroup") - -# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) - -glob_lit_tests( - name = "all_tests", - data = [":test_utilities"], - driver = "@llvm-project//mlir:run_lit.sh", - test_file_exts = [ - "mlir", - ], -) - -# Bundle together all of the test utilities that are used by tests. -filegroup( - name = "test_utilities", - testonly = True, - data = [ - "//xla/mlir/tools/mlir_bisect:mlir-bisect", - "@llvm-project//llvm:FileCheck", - ], - visibility = ["//visibility:public"], -) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir deleted file mode 100644 index e918e112fe46f3..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=EraseOpWithoutResults | FileCheck %s - -func.func @main() -> memref { - %a = arith.constant 1 : i32 - %b = memref.alloc() : memref - memref.store %a, %b[] : memref - func.return %b : memref -} - -// CHECK: func.func @main() -// CHECK: %[[ALLOC:.*]] = memref.alloc -// CHECK-NEXT: return %[[ALLOC]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir deleted file mode 100644 index 6c9deddbc37cb5..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir +++ /dev/null @@ -1,40 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=InlineScfWhile | FileCheck %s - -func.func @main() -> i64 { - %c0 = arith.constant 0 : i64 - %c1 = arith.constant 1 : i64 - %c4 = arith.constant 4 : i64 - %alloc = memref.alloc() : memref - memref.store %c0, %alloc[] : memref - %ret = scf.while(%arg0 = %c0): (i64) -> (i64) { - %cond = arith.cmpi slt, %arg0, %c4 : i64 - scf.condition(%cond) %arg0 : i64 - } do { - ^bb0(%arg1: i64): - %add = arith.addi %arg1, %c1 : i64 - scf.yield %add : i64 - } - return %ret : i64 -} - -// CHECK: func @main -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 -// CHECK: %[[RET:.*]]:2 = scf.execute_region -// CHECK: arith.cmpi slt, %[[C0]], %[[C4]] -// CHECK: yield {{.*}}, %[[C0]] -// CHECK: return %[[RET]]#1 - -// CHECK: func @main -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 -// CHECK: %[[BEFORE0:.*]]:2 = scf.execute_region -// CHECK: arith.cmpi -// CHECK: yield {{.*}}, %[[C0]] -// CHECK: %[[AFTER:.*]] = scf.execute_region -// CHECK: %[[ADD:.*]] = arith.addi %[[BEFORE0]]#1, %[[C1]] -// CHECK: yield %[[ADD]] -// CHECK: %[[BEFORE1:.*]]:2 = scf.execute_region -// CHECK: arith.cmpi -// CHECK: yield {{.*}}, %[[AFTER]] -// CHECK: return %[[BEFORE1]]#1 \ No newline at end of file diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir deleted file mode 100644 index 61f289d3c5cd6c..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=ReduceScfForallBounds | FileCheck %s - -func.func @main() -> tensor<8xindex> { - %init = tensor.empty() : tensor<8xindex> - %iota = scf.forall (%i) = (0) to (8) step (1) - shared_outs (%init_ = %init) -> (tensor<8xindex>) { - %tensor = tensor.from_elements %i : tensor<1xindex> - scf.forall.in_parallel { - tensor.parallel_insert_slice %tensor into %init_[%i] [1] [1] - : tensor<1xindex> into tensor<8xindex> - } - } - func.return %iota : tensor<8xindex> -} -// CHECK: func @main() -// CHECK: scf.forall ({{.*}}) in (7) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir deleted file mode 100644 index 171472ad733642..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=ReplaceOpWithConstant | FileCheck %s - -func.func @main() -> tensor<2xi32> { - %a = arith.constant dense<3> : tensor<2xi32> - %b = arith.constant dense<2> : tensor<2xi32> - %c = mhlo.add %a, %b : tensor<2xi32> - %d = mhlo.multiply %b, %c : tensor<2xi32> - func.return %d : tensor<2xi32> -} - -// CHECK: func.func @main() -// CHECK-NEXT: arith.constant dense<3> -// CHECK-NEXT: arith.constant dense<2> -// CHECK-NEXT: arith.constant dense<5> -// CHECK-NEXT: %[[ADD:.*]] = mhlo.add -// CHECK-NOT: %[[ADD]] -// CHECK-NEXT: mhlo.multiply -// CHECK-NEXT: return - -// CHECK: func.func @main() -// CHECK-NEXT: arith.constant dense<3> -// CHECK-NEXT: arith.constant dense<2> -// CHECK-NEXT: mhlo.add -// CHECK-NEXT: %[[D:.*]] = arith.constant dense<10> -// CHECK-NEXT: mhlo.multiply -// CHECK-NEXT: return %[[D]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir deleted file mode 100644 index f89f647f14ddc6..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=ReplaceOpWithValue | FileCheck %s - -func.func @main() -> (memref, memref) { - %a = memref.alloc() : memref - %b = memref.alloc() : memref - %c0 = arith.constant 0 : i32 - memref.store %c0, %b[] : memref - return %a, %b : memref, memref -} - -// CHECK: func @main() -// CHECK: %[[ALLOC:.*]] = memref.alloc() -// CHECK-NEXT: memref.alloc -// CHECK-NEXT: constant -// CHECK-NEXT: memref.store {{.*}}, %[[ALLOC]] -// CHECK-NEXT: return %[[ALLOC]], %[[ALLOC]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir deleted file mode 100644 index 7619a8a500c5e4..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=ReplaceOperandWithConstant | FileCheck %s - -func.func @main() -> (tensor<2xi32>, tensor<2xi32>) { - %a = arith.constant dense<3> : tensor<2xi32> - %b = arith.constant dense<2> : tensor<2xi32> - %c = mhlo.add %a, %b : tensor<2xi32> - %d = mhlo.multiply %b, %c : tensor<2xi32> - func.return %c, %d : tensor<2xi32>, tensor<2xi32> -} - -// CHECK: func @main() -// CHECK: %[[C2:.*]] = arith.constant dense<2> -// CHECK: %[[ADD:.*]] = mhlo.add -// CHECK: %[[C5:.*]] = arith.constant dense<5> -// CHECK: %[[MUL:.*]] = mhlo.multiply %[[C2]], %[[C5]] : tensor<2xi32> -// CHECK: return %[[ADD]], %[[MUL]] - -// CHECK: func @main() -// CHECK: mhlo.add -// CHECK: %[[MUL:.*]] = mhlo.multiply %cst_0, %0 : tensor<2xi32> -// CHECK: %[[C5:.*]] = arith.constant dense<5> -// CHECK: return %[[C5]], %[[MUL]] - -// CHECK: func @main() -// CHECK: %[[ADD:.*]] = mhlo.add -// CHECK: mhlo.multiply -// CHECK: %[[C10:.*]] = arith.constant dense<10> -// CHECK: return %[[ADD]], %[[C10]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir deleted file mode 100644 index 8584e2a0008fa0..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=ReturnOperandsOfTerminatorOperands | FileCheck %s - -func.func @main() -> tensor<2xi32> { - %a = arith.constant dense<3> : tensor<2xi32> - %b = arith.constant dense<2> : tensor<2xi32> - %c = mhlo.add %a, %b : tensor<2xi32> - %d = mhlo.multiply %b, %c : tensor<2xi32> - func.return %d : tensor<2xi32> -} - -// CHECK: @main -// CHECK: %[[C2:.*]] = arith.constant dense<2> -// CHECK: %[[ADD:.*]] = mhlo.add -// CHECK: mhlo.multiply -// CHECK: return %[[C2]], %[[ADD]] \ No newline at end of file diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir deleted file mode 100644 index af06778bd47c54..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: mlir-bisect %s --debug-strategy=TruncateFunction | FileCheck %s - -// Function to prevent constant folding below. -func.func private @cst() -> tensor<2xi32> { - %cst = arith.constant dense<2> : tensor<2xi32> - return %cst : tensor<2xi32> -} - -func.func @main() -> tensor<2xi32> { - %a = arith.constant dense<1> : tensor<2xi32> - %b = func.call @cst() : () -> tensor<2xi32> - %c = mhlo.add %a, %b : tensor<2xi32> - %d = mhlo.multiply %b, %c : tensor<2xi32> - func.return %d : tensor<2xi32> -} - -// CHECK: func @main() -// CHECK: %[[A:.*]] = arith.constant dense<1> -// CHECK: return %[[A]] - -// CHECK: func @main() -// CHECK: %[[B:.*]] = call @cst() -// CHECK: return %[[B]] - -// CHECK: func @main() -// CHECK: %[[A:.*]] = arith.constant dense<1> -// CHECK: %[[B:.*]] = call @cst() -// CHECK: %[[ADD:.*]] = mhlo.add -// CHECK-DAG: %[[A]] -// CHECK-DAG: %[[B]] -// CHECK: return %[[ADD]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc deleted file mode 100644 index bd253682f3a47b..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "xla/mlir/tools/mlir_bisect/test_passes.h" - -#include "mlir/Dialect/Linalg/IR/Linalg.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project - -namespace mlir { -namespace bisect { -namespace test { -namespace { - -struct BreakLinalgTransposePass - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(BreakLinalgTransposePass) - - StringRef getArgument() const final { return "test-break-linalg-transpose"; } - StringRef getDescription() const final { return "breaks linalg transpose"; } - BreakLinalgTransposePass() = default; - - void runOnOperation() override { - getOperation().walk([](linalg::TransposeOp op) { - auto permutation = llvm::to_vector(op.getPermutation()); - std::swap(permutation[0], permutation[1]); - op.setPermutation(permutation); - }); - } -}; -} // namespace - -void RegisterTestPasses() { PassRegistration(); } - -} // namespace test -} // namespace bisect -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h b/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h deleted file mode 100644 index 2903d54c137139..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_MLIR_TOOLS_MLIR_BISECT_TEST_PASSES_H_ -#define XLA_MLIR_TOOLS_MLIR_BISECT_TEST_PASSES_H_ - -namespace mlir { -namespace bisect { -namespace test { - -void RegisterTestPasses(); - -} -} // namespace bisect -} // namespace mlir - -#endif // XLA_MLIR_TOOLS_MLIR_BISECT_TEST_PASSES_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD deleted file mode 100644 index eed6c662a0f74f..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD +++ /dev/null @@ -1,26 +0,0 @@ -load("//xla:glob_lit_test.bzl", "glob_lit_tests") -load("@local_tsl//tsl:tsl.default.bzl", "filegroup") - -# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) - -glob_lit_tests( - name = "all_tests", - data = [":test_utilities"], - driver = "@llvm-project//mlir:run_lit.sh", - test_file_exts = [ - "mlir", - ], -) - -# Bundle together all of the test utilities that are used by tests. -filegroup( - name = "test_utilities", - testonly = True, - data = [ - "snapshot.mlir.pb", - "//xla/mlir/tools/mlir_bisect:mlir-bisect", - "@llvm-project//llvm:FileCheck", - "@llvm-project//llvm:not", - ], - visibility = ["//visibility:public"], -) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir deleted file mode 100644 index ca839d982c416a..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: mlir-bisect %s \ -// RUN: --pass-pipeline="builtin.module(test-break-linalg-transpose)" \ -// RUN: --max-steps-per-run=200 \ -// RUN: | FileCheck %s - -func.func @main() -> (memref<2x2xindex>, memref<2x2xindex>) { - %a = memref.alloc() : memref<2x2xindex> - %b = memref.alloc() : memref<2x2xindex> - %c = memref.alloc() : memref<2x2xindex> - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c3 = arith.constant 3 : index - scf.for %i = %c0 to %c2 step %c1 { - scf.for %j = %c0 to %c2 step %c1 { - memref.store %i, %a[%i, %j] : memref<2x2xindex> - memref.store %j, %b[%i, %j] : memref<2x2xindex> - } - } - - %i = scf.while: () -> (index) { - %value = memref.load %a[%c0, %c0] : memref<2x2xindex> - %cond = arith.cmpi slt, %value, %c3 : index - scf.condition(%cond) %value : index - } do { - ^bb0(%_: index): - %value = memref.load %a[%c0, %c0] : memref<2x2xindex> - %add = arith.addi %value, %c1 : index - memref.store %add, %a[%c0, %c0] : memref<2x2xindex> - linalg.transpose ins(%b : memref<2x2xindex>) outs(%c : memref<2x2xindex>) - permutation = [1, 0] - memref.copy %c, %b : memref<2x2xindex> to memref<2x2xindex> - scf.yield - } - - return %a, %b : memref<2x2xindex>, memref<2x2xindex> -} - -// CHECK: Final module -// CHECK: func @main() -> memref<2x2xindex> { -// CHECK-NOT: scf.while -// CHECK-NOT: scf.for -// CHECK: linalg.transpose {{.*}} permutation = [1, 0] - -// CHECK: Final module after running pipeline -// CHECK: linalg.transpose {{.*}} permutation = [0, 1] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir deleted file mode 100644 index df343f3bf8b09f..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: not mlir-bisect %s \ -// RUN: --pass-pipeline="builtin.module(test-break-linalg-transpose)" \ -// RUN: | FileCheck %s - -func.func @main() -> memref<2x2xindex> { - %a = memref.alloc() : memref<2x2xindex> - return %a : memref<2x2xindex> -} - -// CHECK: Did not find bug in initial module diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir deleted file mode 100644 index 916ca47ab0fd8e..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: not mlir-bisect %s --hlo-snapshot=%s.pb \ -// RUN: --pass-pipeline="builtin.module(test-break-linalg-transpose)" \ -// RUN: | FileCheck %s - -func.func @main(%a: tensor<3x1xi32>, %b: tensor<3x1xi32>) -> tensor<3x1xi32> { - return %a : tensor<3x1xi32> -} - -// CHECK: initial module -// CHECK: func @main() -> tensor<3x1xi32> { -// CHECK{LITERAL}: arith.constant dense<[[2], [-4], [5]]> : tensor<3x1xi32> -// CHECK{LITERAL}: arith.constant dense<[[0], [7], [-5]]> : tensor<3x1xi32> diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir.pb b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir.pb deleted file mode 100644 index ee3c8f759494db153cd7114783124b1cb7fb5da0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 68 scmWeq;1UpEkz!(I)MDXcVq`F4Vqj3>VfynQ3K&_1u&Q8S{|#3H00LzcG5`Po diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/BUILD b/third_party/xla/xla/mlir/tools/mlir_replay/BUILD deleted file mode 100644 index 246676f6526f10..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/BUILD +++ /dev/null @@ -1,71 +0,0 @@ -load("//xla:xla.bzl", "xla_cc_binary") -load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") -load("@bazel_skylib//rules:build_test.bzl", "build_test") - -# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) - -build_test( - name = "mlir_replay_build_test", - targets = [ - ":mlir_replay", - ], -) - -xla_cc_binary( - name = "mlir_replay", - srcs = ["mlir_replay.cc"], - deps = [ - ":mlir_replay_lib", - "//xla:debug_options_flags", - "//xla/mlir/framework/ir:xla_framework", - "//xla/mlir/runtime/ir:rt", - "//xla/mlir/tools/mlir_replay/public:compiler_trace_proto_cc", - "//xla/mlir/tools/mlir_replay/public:compiler_trace_proto_cc_impl", - "//xla/mlir/tools/mlir_replay/public:execution_trace_proto_cc", - "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", - "//xla/mlir/xla_cpu/ir:xla_cpu", - "//xla/mlir_hlo:deallocation", - "//xla/mlir_hlo:gml_st", - "//xla/mlir_hlo:hlo_dialect_registration", - "//xla/mlir_hlo:lhlo", - "//xla/mlir_hlo:lhlo_gpu", - "//xla/mlir_hlo:mlir_interpreter_dialects", - "//xla/mlir_hlo:mlir_interpreter_framework", - "//xla/mlir_hlo:thlo", - "//xla/service:hlo_proto_cc", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@llvm-project//mlir:AllPassesAndDialects", - "@llvm-project//mlir:IR", - "@local_tsl//tsl/platform:env", - "@local_tsl//tsl/platform:path", - "@local_tsl//tsl/platform:platform_port", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/util:command_line_flags", - ], -) - -cc_library( - name = "mlir_replay_lib", - srcs = ["mlir_replay_lib.cc"], - hdrs = ["mlir_replay_lib.h"], - visibility = ["//visibility:public"], - deps = [ - "//xla:xla_data_proto_cc", - "//xla/mlir/framework/ir:xla_framework", - "//xla/mlir/tools/mlir_replay/public:execution_trace_proto_cc", - "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", - "//xla/mlir_hlo:mlir_interpreter_framework", - "//xla/service:hlo_proto_cc", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:bit_gen_ref", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:MlirReduceLib", - "@llvm-project//mlir:Support", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:statusor", - ], -) diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/README.md b/third_party/xla/xla/mlir/tools/mlir_replay/README.md deleted file mode 100644 index 6c2091d526e92f..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# MLIR Replay tool - -This tool is mainly intended for helping debug miscompiles. It takes as inputs -an HLO snapshot proto with input tensors and a compiler trace proto with the -state of the IR after each pass. - -This tool is built on top of -[mlir-interpreter](https://github.com/tensorflow/mlir-hlo/tree/master/tools/mlir_interpreter/). - -Example usage: - -``` -# Run a JAX test with debug flags enabled: -$ bazel test :some_jax_test --compilation_mode=opt \ - --test_env=XLA_FLAGS="--xla_cpu_use_xla_runtime --xla_dump_to=/tmp/test-dump --xla_dump_hlo_snapshots" \ - --test_filter=SomeSpecific.TestCase \ - --test_sharding_strategy=disabled --test_strategy=local - -# JAX tends to compile many modules, so first check which one is broken: -./mlir_replay \ - --mlir-compilation-trace-dir=/tmp/test-dump - -Failures for /tmp/test-dump/module_1234.jit_something.mlir-trace.pb: - Result mismatch for /tmp/test-dump/module_1234.jit_something.snapshot.56.pb: TensorOrMemref<3xi32>: [1, 2, 3] != TensorOrMemref<3xi32>: [1, 1, 1] - run :mlir_replay -- --mlir-compilation-trace=/tmp/test-dump/module_1234.jit_something.mlir-trace.pb --hlo-snapshot=/tmp/test-dump/module_1234.jit_something.snapshot.56.pb --print-changes-only --stop-after-first-failure -``` - -There may be multiple failing modules. You can run the provided command to -replay a particular one: - -``` -# Run the IR after each pass. Note that JAX typically compiles many modules, so -# you may have check more than one. -# There is one .mlir-trace.pb file per module (containing the intermediate IR) -# and one .snapshot.pb file per execution (containing the inputs and outputs). -$ ./mlir_replay \ - --mlir-compilation-trace=/tmp/test-dump/module_1234.jit_something.mlir-trace.pb \ - --hlo-snapshot=/tmp/test-dump/module_1234.jit_something.snapshot.56.pb \ - --print-changes-only --stop-after-first-failure -Running IR after APass -Results: [1, 2, 3] - -Running IR after BPass -Running IR after CPass -Running IR after BrokenPass -Results: [1, 1, 1] -``` - diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay.cc b/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay.cc deleted file mode 100644 index 5d075b6d0ac7f2..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay.cc +++ /dev/null @@ -1,248 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "absl/strings/str_format.h" -#include "absl/strings/str_split.h" -#include "mlir/IR/DialectRegistry.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/InitAllDialects.h" // from @llvm-project -#include "xla/debug_options_flags.h" -#include "xla/mlir/framework/ir/xla_framework.h" -#include "xla/mlir/runtime/ir/rt_dialect.h" -#include "xla/mlir/tools/mlir_replay/mlir_replay_lib.h" -#include "xla/mlir/tools/mlir_replay/public/compiler_trace.pb.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace.pb.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" -#include "xla/mlir/xla_cpu/ir/xla_cpu.h" -#include "xla/mlir_hlo/deallocation/IR/deallocation_ops.h" -#include "xla/mlir_hlo/gml_st/IR/gml_st_ops.h" -#include "xla/mlir_hlo/lhlo/IR/lhlo_ops.h" -#include "xla/mlir_hlo/lhlo_gpu/IR/lhlo_gpu_ops.h" -#include "xla/mlir_hlo/mhlo/IR/register.h" -#include "xla/mlir_hlo/thlo/IR/thlo_ops.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h" -#include "xla/service/hlo.pb.h" -#include "tsl/platform/env.h" -#include "tsl/platform/init_main.h" -#include "tsl/platform/path.h" -#include "tsl/platform/status.h" -#include "tsl/util/command_line_flags.h" - -struct ReplayOptions { - std::string hlo_snapshot; - std::string mlir_compilation_trace; - std::string mlir_compilation_trace_dir = ""; - std::string execution_trace_dir = ""; - std::vector entry_points = {"main", "main_xla_framework"}; - bool print_changes_only = false; - bool stop_after_first_failure = false; - bool print_values = true; -}; - -bool ResultsMatch(const xla::HloSnapshot& snapshot, - const llvm::SmallVector& - first_pass_results, - std::vector& failures, - const ReplayOptions& opts) { - auto actual = mlir::interpreter::LiteralToValue(snapshot.result()); - TF_CHECK_OK(actual.status()); - - // We assume this is MHLO, so multiple results will be in a tuple. - if (first_pass_results.size() != 1) { - failures.push_back("expected one result"); - return false; - } - - if (!(*actual == first_pass_results[0])) { - if (opts.print_values) { - failures.push_back("result mismatch: " + actual->toString() + - " != " + first_pass_results[0].toString()); - } else { - failures.push_back("result mismatch"); - } - return false; - } - return true; -} - -void TestAll(mlir::MLIRContext& context, const ReplayOptions& opts) { - std::vector traces; - TF_CHECK_OK(tsl::Env::Default()->GetMatchingPaths( - opts.mlir_compilation_trace_dir + "/*.mlir-trace.pb", &traces)); - - for (const auto& trace_path : traces) { - mlir::interpreter::MlirCompilationTrace trace; - TF_CHECK_OK(tsl::ReadBinaryProto(tsl::Env::Default(), trace_path, &trace)) - << "Failed to load " << trace_path; - - std::vector snapshots; - std::string prefix = - trace_path.substr(0, trace_path.length() - strlen(".mlir-trace.pb")); - TF_CHECK_OK(tsl::Env::Default()->GetMatchingPaths(prefix + "*.snapshot.*", - &snapshots)); - CHECK_NE(snapshots.size(), 0) - << "No snapshots found for module " << trace_path << "."; - - std::vector failures; - for (const auto& snapshot_path : snapshots) { - xla::HloSnapshot snapshot; - TF_CHECK_OK( - tsl::ReadBinaryProto(tsl::Env::Default(), snapshot_path, &snapshot)); - - auto results = - mlir::interpreter::Run(context, trace.passes(0).mlir_module(), - snapshot, nullptr, opts.entry_points); - if (!results.status().ok()) { - failures.push_back("Failed to execute " + snapshot_path + ": " + - results.status().ToString()); - } else { - if (!ResultsMatch(snapshot, *results, failures, opts)) { - failures.push_back( - std::string("run :mlir_replay -- --mlir-compilation-trace=") + - trace_path + " --hlo-snapshot=" + snapshot_path + - " --print-changes-only --stop-after-first-failure"); - } - } - } - - if (!failures.empty()) { - llvm::errs() << "Failures for " << trace_path << ":\n " - << absl::StrJoin(failures, "\n ") << "\n"; - } - } -} - -int main(int argc, char* argv[]) { - // Flush llvm::outs before writing errors. - llvm::errs().tie(&llvm::outs()); - - std::string entry_points; - ReplayOptions opts; - std::vector flag_list = { - tsl::Flag("hlo-snapshot", &opts.hlo_snapshot, - "Filename of an HloSnapshot proto. Only used to read inputs."), - tsl::Flag("mlir-compilation-trace", &opts.mlir_compilation_trace, - "Filename of an MlirCompilerTrace proto."), - tsl::Flag("mlir-compilation-trace-dir", &opts.mlir_compilation_trace_dir, - "Directory from which to load MlirCompilerTrace and " - "HloSnapshot protos. The tool will run all snapshots and " - "report the ones with bugs."), - tsl::Flag("execution-trace-dir", &opts.execution_trace_dir, - "Directory where to store the execution traces (optional)."), - tsl::Flag("entry-point", &entry_points, - "Program entry function (optional, defaults to 'main')."), - tsl::Flag("print-changes-only", &opts.print_changes_only, - "If set, only print changed values"), - tsl::Flag("stop-after-first-failure", &opts.stop_after_first_failure, - "If set, stop after the first failed invocation."), - tsl::Flag("print-values", &opts.print_values, "If set, print values."), - }; - xla::AppendDebugOptionsFlags(&flag_list); - - // The usage string includes the message at the top of the file, the - // DebugOptions flags and the flags defined above. - std::string usage_string = tsl::Flags::Usage(argv[0], flag_list); - if (!tsl::Flags::Parse(&argc, argv, flag_list)) { - return 1; - } - - if (!entry_points.empty()) { - opts.entry_points = absl::StrSplit(entry_points, ','); - } - - tsl::port::InitMain(usage_string.c_str(), &argc, &argv); - - CHECK(opts.mlir_compilation_trace.empty() != - opts.mlir_compilation_trace_dir.empty()) - << "Exactly one of --mlir-compilation-trace and " - "--mlir-compilation-trace-dir must be specified."; - - CHECK(opts.mlir_compilation_trace_dir.empty() || opts.hlo_snapshot.empty()) - << "If --mlir-compilation-trace-dir is set, --hlo-snapshot must not be."; - - mlir::DialectRegistry registry; - mlir::registerAllDialects(registry); - mlir::mhlo::registerAllMhloDialects(registry); - registry.insert(); - - mlir::MLIRContext context(registry); - - if (!opts.mlir_compilation_trace_dir.empty()) { - TestAll(context, opts); - return 0; - } - - xla::HloSnapshot snapshot; - if (!opts.hlo_snapshot.empty()) { - TF_CHECK_OK(tsl::ReadBinaryProto(tsl::Env::Default(), opts.hlo_snapshot, - &snapshot)); - } - mlir::interpreter::MlirCompilationTrace trace; - TF_CHECK_OK(tsl::ReadBinaryProto(tsl::Env::Default(), - opts.mlir_compilation_trace, &trace)); - - llvm::SmallVector previous_results; - int pass_id = 0; - for (auto& state : trace.passes()) { - llvm::outs() << "Running IR after " << state.after_pass() << ".\n"; - mlir::interpreter::ExecutionTrace execution_trace; - auto results = mlir::interpreter::Run( - context, state.mlir_module(), snapshot, - opts.execution_trace_dir.empty() ? nullptr : &execution_trace, - opts.entry_points); - if (results.status().ok()) { - if (opts.print_values && - (!opts.print_changes_only || (*results != previous_results))) { - llvm::outs() << "Results:\n"; - for (const auto& result : *results) { - llvm::outs() << result.toString() << "\n"; - } - previous_results = *results; - llvm::outs() << "\n"; - } - } else { - llvm::errs() << results.status().ToString() << "\n"; - if (opts.stop_after_first_failure) { - return 1; - } - } - - if (!opts.execution_trace_dir.empty()) { - TF_CHECK_OK( - tsl::Env::Default()->RecursivelyCreateDir(opts.execution_trace_dir)); - std::string filename = tsl::io::JoinPath( - opts.execution_trace_dir, - absl::StrFormat("%.4d.%s.mlir", pass_id, state.after_pass())); - TF_CHECK_OK(tsl::WriteStringToFile(tsl::Env::Default(), filename, - execution_trace.ir())); - - filename = tsl::io::JoinPath( - opts.execution_trace_dir, - absl::StrFormat("%.4d.%s.trace.pb", pass_id, state.after_pass())); - TF_CHECK_OK(tsl::WriteBinaryProto(tsl::Env::Default(), filename, - execution_trace)); - } - ++pass_id; - } - - return 0; -} diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.cc b/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.cc deleted file mode 100644 index d80b8fc7cb257f..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.cc +++ /dev/null @@ -1,263 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "xla/mlir/tools/mlir_replay/mlir_replay_lib.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "absl/container/flat_hash_map.h" -#include "absl/random/bit_gen_ref.h" -#include "absl/random/random.h" -#include "llvm/ADT/APInt.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/OwningOpRef.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "mlir/Support/LogicalResult.h" // from @llvm-project -#include "mlir/Tools/ParseUtilities.h" // from @llvm-project -#include "xla/mlir/framework/ir/xla_framework.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h" -#include "xla/service/hlo.pb.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/statusor.h" - -namespace mlir { -namespace interpreter { -namespace { - -tsl::StatusOr> LoadArgs( - const xla::HloSnapshot& snapshot, TypeRange types) { - SmallVector result; - for (const auto& [arg, type] : llvm::zip(snapshot.arguments(), types)) { - TF_ASSIGN_OR_RETURN(auto converted, LiteralToValue(arg, type)); - result.push_back(std::move(converted)); - } - return result; -} - -namespace { -template class rng_t> -mlir::interpreter::InterpreterValue RandomTensor(absl::BitGenRef bitgen, - mlir::Type type) { - llvm::SmallVector shape; - auto shaped_ty = type.dyn_cast(); - if (shaped_ty) { - shape = llvm::to_vector(shaped_ty.getShape()); - } - - auto rng = rng_t{}; - auto result = mlir::interpreter::TensorOrMemref::empty(shape); - for (const auto& index : result.view.indices()) { - auto& elem = result.at(index) = rng(bitgen); - // Ints are typically indices, so scale them down to a more reasonable - // range. - if constexpr (std::is_same_v) { - elem >>= 60; - } - } - if (shaped_ty) { - return {result}; - } - return {result.at({})}; -} -} // namespace - -mlir::FailureOr MakeRandomInput( - absl::BitGenRef bitgen, mlir::Type type) { - auto elem_ty = - type.isa() ? type.cast().getElementType() : type; - if (elem_ty.isF32()) { - return RandomTensor(bitgen, type); - } - if (elem_ty.isF64()) { - return RandomTensor(bitgen, type); - } - if (elem_ty.isInteger(32)) { - return RandomTensor(bitgen, type); - } - if (elem_ty.isInteger(16)) { - return RandomTensor(bitgen, type); - } - if (elem_ty.isInteger(64)) { - return RandomTensor(bitgen, type); - } - if (elem_ty.isInteger(1)) { - return {{TensorOrMemref::empty(type.cast().getShape())}}; - } - - llvm::errs() << "Unsupported type: "; - type.print(llvm::errs()); - llvm::errs() << "\n"; - return failure(); -} - -// TODO(jreiffers): Add a flag to intentionally alias as many buffers as -// possible (in particular, all non-variable inputs). -// Extracts a mapping from function arguments to allocated buffers. -// The buffer assignment is only relevant once the program is bufferized and -// memref results were converted to arguments. -std::vector extractXlaBufferAssignment(func::FuncOp main) { - std::vector buffer_assignment(main.getNumArguments()); - auto result_mapping = - main->getAttrOfType("xla_framework.result_mapping"); - if (!result_mapping) { - // No attribute, fall back to unique buffers for each argument. - std::iota(buffer_assignment.begin(), buffer_assignment.end(), 0); - return buffer_assignment; - } - - std::vector result_to_buffer; - if (auto inner_mapping = main->getAttrOfType( - "xla_framework.result_inner_mapping")) { - llvm::copy(llvm::map_range(inner_mapping.getAsValueRange(), - [](const llvm::APInt& value) { - return value.getSExtValue(); - }), - std::back_inserter(result_to_buffer)); - } else { - result_to_buffer = {result_mapping.getInt()}; - } - - int64_t result_index = 0; - for (int64_t arg_index : llvm::seq(0, main.getNumArguments())) { - if (auto input_buffer_index = main.getArgAttrOfType( - arg_index, "xla_framework.input_mapping")) { - buffer_assignment[arg_index] = input_buffer_index.getInt(); - } else { - buffer_assignment[arg_index] = result_to_buffer[result_index++]; - } - } - - return buffer_assignment; -} - -} // namespace - -tsl::StatusOr> Run( - MLIRContext& context, const std::string& mlir_ir, - const xla::HloSnapshot& snapshot, ExecutionTrace* trace, - const std::vector& entry) { - auto sourceMgr = std::make_shared(); - sourceMgr->AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(mlir_ir), - mlir::SMLoc()); - mlir::OwningOpRef module = - mlir::parseSourceFileForTool(sourceMgr, &context, false); - if (!module) { - return tsl::errors::InvalidArgument("failed to parse MLIR"); - } - - SymbolTable symbols(*module); - func::FuncOp main; - for (const std::string& candidate : entry) { - main = llvm::dyn_cast_or_null(symbols.lookup(candidate)); - if (main && !main.getBody().empty()) { - break; - } - } - - if (!main) { - return tsl::errors::InvalidArgument("failed to find entry point"); - } - - if (trace) { - llvm::raw_string_ostream os(*trace->mutable_ir()); - (*module)->print(os, OpPrintingFlags().printGenericOpForm()); - } - - // After xla-rt-export-functions, we have an execution context as the first - // argument. The interpreter currently cannot deal with these things, so we - // fail in that case. - auto function_args = main.getBody().getBlocks().front().getArguments(); - auto buffer_type = xla_framework::BufferType::get(main.getContext()); - if (!llvm::all_of(function_args, [&](Value arg) { - return arg.getType().isa() || arg.getType() == buffer_type; - })) { - return tsl::errors::InvalidArgument( - "expected all function arguments to be shaped types"); - } - - auto args_to_buffers = extractXlaBufferAssignment(main); - TF_ASSIGN_OR_RETURN(auto args, - LoadArgs(snapshot, main.getBody().getArgumentTypes())); - auto out_args = - main.getBody().getBlocks().front().getArguments().drop_front(args.size()); - - absl::flat_hash_map buffer_to_value; - // None of the input arguments will be statically known to alias. - for (auto [index, value] : llvm::enumerate(args)) { - buffer_to_value[args_to_buffers[index]] = value; - } - - std::seed_seq my_seed_seq({0}); - absl::BitGen bitgen(my_seed_seq); - llvm::SmallVector out_buffers; - // Add random inputs for output arguments and unspecified inputs. - for (auto arg : out_args) { - auto ty = arg.getType(); - if (ty == buffer_type) { - // Buffers are used exactly once, in a buffer_to_mem op. - if (!arg.hasOneUse()) { - return tsl::errors::InvalidArgument( - "expected buffer argument to be used eactly once"); - } - ty = arg.getUsers().begin()->getResultTypes().front(); - } - - int64_t buffer_index = args_to_buffers[arg.getArgNumber()]; - // If we already have a buffer for this argument, use it. - if (buffer_to_value.contains(buffer_index)) { - auto& value = buffer_to_value[buffer_index]; - out_buffers.push_back(value); - args.push_back(value); - continue; - } - - auto arg_or = MakeRandomInput(bitgen, ty); - if (!succeeded(arg_or)) { - return tsl::errors::InvalidArgument("failed to create input"); - } - out_buffers.push_back(*arg_or); - args.push_back(*arg_or); - buffer_to_value[buffer_index] = *arg_or; - } - - InterpreterOptions options; - ExecutionTraceListener tracer(trace); - if (trace) { - options.listener = &tracer; - } - auto results_or = runInterpreter(symbols, main, args, options); - if (!succeeded(results_or)) { - return tsl::errors::Internal("interpreter failed"); - } - - if (results_or->empty()) { - return out_buffers; - } - return *results_or; -} - -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.h b/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.h deleted file mode 100644 index e87438b671a2b1..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_MLIR_TOOLS_MLIR_REPLAY_MLIR_REPLAY_LIB_H_ -#define XLA_MLIR_TOOLS_MLIR_REPLAY_MLIR_REPLAY_LIB_H_ - -#include -#include - -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/mlir/tools/mlir_replay/public/execution_trace.pb.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h" -#include "xla/service/hlo.pb.h" -#include "tsl/platform/statusor.h" - -namespace mlir { -namespace interpreter { - -// Runs the given IR on the inputs from `snapshot` and returns the result. -tsl::StatusOr> Run( - MLIRContext& context, const std::string& mlir_ir, - const xla::HloSnapshot& snapshot, ExecutionTrace* trace, - const std::vector& entry); - -} // namespace interpreter -} // namespace mlir - -#endif // XLA_MLIR_TOOLS_MLIR_REPLAY_MLIR_REPLAY_LIB_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/public/BUILD b/third_party/xla/xla/mlir/tools/mlir_replay/public/BUILD index e459debb306979..efaa3a84e62d1c 100644 --- a/third_party/xla/xla/mlir/tools/mlir_replay/public/BUILD +++ b/third_party/xla/xla/mlir/tools/mlir_replay/public/BUILD @@ -1,6 +1,5 @@ load("@local_tsl//tsl/platform:build_config.bzl", "tf_proto_library") load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") -load("//xla:xla.bzl", "xla_cc_test") package( default_visibility = ["//visibility:public"], @@ -26,47 +25,6 @@ cc_library( ], ) -cc_library( - name = "execution_trace_utils", - srcs = ["execution_trace_utils.cc"], - hdrs = ["execution_trace_utils.h"], - visibility = ["//visibility:public"], - deps = [ - ":execution_trace_proto_cc", - ":execution_trace_proto_cc_impl", - "//xla:literal", - "//xla:xla_data_proto_cc", - "//xla/mlir_hlo:mlir_interpreter_framework", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Support", - "@local_tsl//tsl/platform:statusor", - ], -) - -xla_cc_test( - name = "execution_trace_utils_test", - srcs = ["execution_trace_utils_test.cc"], - deps = [ - ":execution_trace_utils", - "//xla:literal_util", - "//xla/mlir_hlo:mlir_interpreter_framework", - "@com_google_googletest//:gtest_main", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:Support", - "@local_tsl//tsl/platform:statusor", - ], -) - -tf_proto_library( - name = "execution_trace_proto", - srcs = ["execution_trace.proto"], - cc_api_version = 2, - make_default_target_header_only = True, - visibility = ["//visibility:public"], -) - tf_proto_library( name = "compiler_trace_proto", srcs = ["compiler_trace.proto"], diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/public/README.md b/third_party/xla/xla/mlir/tools/mlir_replay/public/README.md index c886abf5ffd1d6..553accf2d0543e 100644 --- a/third_party/xla/xla/mlir/tools/mlir_replay/public/README.md +++ b/third_party/xla/xla/mlir/tools/mlir_replay/public/README.md @@ -1,10 +1,3 @@ -# Public API of mlir_replay +# DEPRECATED: Public API of mlir_replay -This contains protocol buffers and utilities that can be reused for other -debugging tools: - -1. **The compiler trace proto**: A record of the state of the IR after each - compilation pass -1. A compiler instrumentation to create the above proto. -1. **The execution trace proto**: A record of SSA values as the IR is executed -1. Utilities for working with the above protos. +Do not use. This is in the process of being removed. \ No newline at end of file diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace.proto b/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace.proto deleted file mode 100644 index 6be6407505c0bc..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace.proto +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -syntax = "proto2"; - -package mlir.interpreter; - -message TracedValue { - // The shape - includes vector dimensions. - // TODO(jreiffers): Model vector dimensions separately. - repeated int64 shape = 1; - optional bool is_scalar = 2; - - enum ElementType { - UNKNOWN = 0; - INTEGRAL = 1; - UNSIGNED = 2; - FLOAT = 3; - COMPLEX = 4; - TUPLE = 5; - } - - optional int32 bit_width = 3; - optional ElementType element_type = 4; - - repeated float floats = 5 [packed = true]; - repeated double doubles = 6 [packed = true]; - repeated int64 ints = 7 [packed = true]; - repeated uint64 uints = 8 [packed = true]; - repeated TracedValue tuple_elements = 9; -} - -message InstructionTrace { - optional string name = 1; - repeated TracedValue args = 2; - repeated TracedValue results = 3; - // TODO(jreiffers): Model side effects (e.g. memref.store). - - repeated RegionTrace regions = 4; -} - -message RegionTrace { - // The number of the region that is being executed (within the parent op). - // For example: '1' for an scf.while's `after` region. - optional int32 region_number = 1; - // The arguments that were passed to the region. - repeated TracedValue bbargs = 2; - // One instruction per instruction in the region. - repeated InstructionTrace instructions = 3; - repeated TracedValue results = 4; -} - -message ExecutionTrace { - // The IR that was executed. Note: this should always be filled in the generic - // format. - optional string ir = 1; - - // The trace of the entry function execution. - optional RegionTrace trace = 2; -} \ No newline at end of file diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.cc b/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.cc deleted file mode 100644 index 7659bdcf02975f..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.cc +++ /dev/null @@ -1,447 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project -#include "mlir/IR/BuiltinTypes.h" // from @llvm-project -#include "xla/mlir/tools/mlir_replay/public/execution_trace.pb.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/tensor_or_memref.h" -#include "tsl/platform/statusor.h" - -namespace mlir { -namespace interpreter { -namespace { - -// Visitor for converting an InterpreterValue to a TracedValue. -struct TraceInterpreterValueVisitor { - TracedValue out; - - void Add(float v) { out.add_floats(v); } - void Add(double v) { out.add_doubles(v); } - void Add(std::complex v) { - out.add_floats(v.real()); - out.add_floats(v.imag()); - } - void Add(std::complex v) { - out.add_doubles(v.real()); - out.add_doubles(v.imag()); - } - void Add(int64_t v) { out.add_ints(v); } - void Add(int32_t v) { out.add_ints(v); } - void Add(int16_t v) { out.add_ints(v); } - void Add(int8_t v) { out.add_ints(v); } - void Add(uint64_t v) { out.add_uints(v); } - void Add(uint32_t v) { out.add_uints(v); } - void Add(uint16_t v) { out.add_uints(v); } - void Add(uint8_t v) { out.add_uints(v); } - void Add(bool v) { out.add_ints(static_cast(v)); } - - template - void operator()(T v) { - SetElementType(); - out.set_is_scalar(true); - Add(v); - } - - void operator()(const Tuple& t) { - out.set_element_type(TracedValue::TUPLE); - for (const auto& v : t.values) { - *out.add_tuple_elements() = ValueToTracedValue(*v); - } - } - - template - void operator()(const TensorOrMemref& v) { - for (int64_t size : v.view.sizes) { - out.add_shape(size); - } - SetElementType(); - for (const auto& index : v.view.indices()) { - Add(v.at(index)); - } - } - - template - void SetElementType() { - out.set_element_type(GetElementType(T{})); - if constexpr (std::is_same_v) { - out.set_bit_width(1); - } else { - out.set_bit_width(sizeof(T) * 8); - } - } - - template - static TracedValue::ElementType GetElementType(const T&) { - if constexpr (std::is_floating_point_v) { - return TracedValue::FLOAT; - } else if constexpr (std::is_integral_v) { - if constexpr (std::is_unsigned_v) { - return TracedValue::UNSIGNED; - } else { - return TracedValue::INTEGRAL; - } - } else { - T{"invalid type"} + 0; - return TracedValue::UNKNOWN; - } - } - - template - static TracedValue::ElementType GetElementType(const std::complex&) { - return TracedValue::COMPLEX; - } - - static TracedValue::ElementType GetElementType(const Tuple&) { - return TracedValue::UNKNOWN; - } -}; - -} // namespace - -void ExecutionTraceListener::beforeOp(ArrayRef args, - Operation* op) { - auto* inst = regions_.back()->add_instructions(); - inst->set_name(op->getName().getStringRef().str()); - for (const auto& arg : args) { - *inst->add_args() = ValueToTracedValue(arg); - } -} - -void ExecutionTraceListener::afterOp(ArrayRef results) { - auto* traced_results = - regions_.back()->mutable_instructions()->rbegin()->mutable_results(); - for (const auto& result : results) { - *traced_results->Add() = ValueToTracedValue(result); - } -} - -void ExecutionTraceListener::enterRegion(ArrayRef bbargs, - Region& region) { - if (regions_.empty()) { - regions_.push_back(trace_->mutable_trace()); - } else { - regions_.push_back( - regions_.back()->mutable_instructions()->rbegin()->add_regions()); - } - - auto& traced_region = *regions_.back(); - traced_region.set_region_number(region.getRegionNumber()); - for (const auto& bbarg : bbargs) { - *traced_region.add_bbargs() = ValueToTracedValue(bbarg); - } -} - -void ExecutionTraceListener::leaveRegion(ArrayRef yielded) { - for (const auto& result : yielded) { - *regions_.back()->add_results() = ValueToTracedValue(result); - } - regions_.pop_back(); -} - -llvm::SmallVector ValueToAttribute( - const InterpreterValue& value, mlir::Type type) { - if (std::holds_alternative(value.storage)) { - auto types = type.cast().getTypes(); - const auto& t = std::get(value.storage); - llvm::SmallVector attrs; - for (const auto& [v, ty] : llvm::zip(t.values, types)) { - auto attr = ValueToAttribute(*v, ty); - assert(attr.size() == 1 && "nested tuples not supported"); - attrs.push_back(attr.front()); - } - return attrs; - } - - if (!value.isTensor()) { - return {cast( - ValueToAttribute(value.asUnitTensor(), - mlir::RankedTensorType::get({}, type)) - .front()) - .getValues()[0]}; - } - - if (!type.isa()) { - return {}; - } - - auto shaped_ty = type.cast(); - return {dispatchScalarType(shaped_ty, [&](auto dummy) -> mlir::Attribute { - using T = decltype(dummy); - auto& t = std::get>(value.storage); - SmallVector vals; - for (const auto& index : t.view.indices()) { - vals.push_back(t.at(index)); - } - auto attr_ty = - shaped_ty.cloneWith(/*shape=*/t.view.sizes, shaped_ty.getElementType()); - if constexpr (std::is_same_v) { - return mlir::DenseElementsAttr::get(attr_ty, vals); - } else { - return mlir::DenseElementsAttr::get(attr_ty, vals); - } - })}; -} - -namespace { -template -TensorOrMemref ArrayLiteralToTensor(const xla::Literal& literal) { - SmallVector layout; - if (literal.shape().has_layout()) { - llvm::copy(literal.shape().layout().minor_to_major(), - std::back_inserter(layout)); - } - SmallVector shape{literal.shape().dimensions().begin(), - literal.shape().dimensions().end()}; - auto result = TensorOrMemref::empty(shape, layout); - assert(literal.size_bytes() == result.buffer->getByteSize() && - "expected buffer sizes to match"); - memcpy(result.buffer->at(0, 0), literal.untyped_data(), - result.buffer->getByteSize()); - return result; -} -} // namespace - -tsl::StatusOr LiteralToValue(const xla::Literal& literal) { - if (literal.shape().IsTuple()) { - auto elements = literal.Clone().DecomposeTuple(); - Tuple result; - for (auto& element : elements) { - TF_ASSIGN_OR_RETURN(auto converted, LiteralToValue(element)); - result.values.push_back( - std::make_shared(std::move(converted))); - } - return {{result}}; - } - - if (literal.shape().IsToken()) { - return tsl::errors::Unimplemented("token arguments are not implemented"); - } - - if (literal.shape().IsArray()) { - switch (literal.shape().element_type()) { - case xla::PRED: - return {{ArrayLiteralToTensor(literal)}}; - case xla::S8: - return {{ArrayLiteralToTensor(literal)}}; - case xla::S16: - return {{ArrayLiteralToTensor(literal)}}; - case xla::S32: - return {{ArrayLiteralToTensor(literal)}}; - case xla::S64: - return {{ArrayLiteralToTensor(literal)}}; - case xla::U8: - return {{ArrayLiteralToTensor(literal)}}; - case xla::U16: - return {{ArrayLiteralToTensor(literal)}}; - case xla::U32: - return {{ArrayLiteralToTensor(literal)}}; - case xla::U64: - return {{ArrayLiteralToTensor(literal)}}; - case xla::F16: - return tsl::errors::Unimplemented("F16 not implemented"); - case xla::F32: - return {{ArrayLiteralToTensor(literal)}}; - case xla::BF16: - return tsl::errors::Unimplemented("BF16 not implemented"); - case xla::F64: - return {{ArrayLiteralToTensor(literal)}}; - case xla::F8E5M2: - return tsl::errors::Unimplemented("F8E5M2 not implemented"); - case xla::F8E4M3FN: - return tsl::errors::Unimplemented("F8E4M3FN not implemented"); - case xla::F8E4M3B11FNUZ: - return tsl::errors::Unimplemented("F8E4M3B11FNUZ not implemented"); - case xla::F8E5M2FNUZ: - return tsl::errors::Unimplemented("F8E5M2FNUZ not implemented"); - case xla::F8E4M3FNUZ: - return tsl::errors::Unimplemented("F8E4M3FNUZ not implemented"); - case xla::C64: - return {{ArrayLiteralToTensor>(literal)}}; - case xla::C128: - return {{ArrayLiteralToTensor>(literal)}}; - default: - // Fallthrough intended. - break; - } - } - - return tsl::errors::InvalidArgument("unexpected literal type"); -} - -tsl::StatusOr LiteralToValue( - const xla::LiteralProto& literal) { - TF_ASSIGN_OR_RETURN(auto deserialized, - xla::Literal::CreateFromProto(literal)); - return LiteralToValue(deserialized); -} - -tsl::StatusOr LiteralToValue(const xla::LiteralProto& literal, - mlir::Type type) { - TF_ASSIGN_OR_RETURN(auto result, LiteralToValue(literal)); - return {dispatchScalarType(type, [&](auto dummy) -> InterpreterValue { - TensorOrMemref cast; - cast.view = result.view(); - cast.buffer = result.buffer(); - return {cast}; - })}; -} - -TracedValue ValueToTracedValue(const InterpreterValue& value) { - TraceInterpreterValueVisitor visitor; - std::visit(visitor, value.storage); - return visitor.out; -} - -tsl::StatusOr TracedValueToValue( - const TracedValue& traced_value) { - auto extract = [&](auto dummy, auto& elements) -> InterpreterValue { - using T = decltype(dummy); - if (traced_value.is_scalar()) { - return {static_cast(elements[0])}; - } - - auto result = - TensorOrMemref::empty(llvm::to_vector(traced_value.shape())); - for (auto [index, element] : llvm::zip(result.view.indices(), elements)) { - result.at(index) = element; - } - return {result}; - }; - auto extract_complex = [&](auto& elements) -> InterpreterValue { - using T = std::complex>; - if (traced_value.is_scalar()) { - return {T{elements[0], elements[1]}}; - } - - auto result = - TensorOrMemref::empty(llvm::to_vector(traced_value.shape())); - int64_t i = 0; - for (auto it = result.view.indices().begin(), - end = result.view.indices().end(); - it != end; ++it, i += 2) { - result.at(*it) = {elements[i], elements[i + 1]}; - } - return {result}; - }; - switch (traced_value.element_type()) { - case TracedValue::UNKNOWN: - break; - case TracedValue::FLOAT: - if (traced_value.bit_width() == 32) { - return extract(float{}, traced_value.floats()); - } - return extract(double{}, traced_value.doubles()); - case TracedValue::UNSIGNED: - switch (traced_value.bit_width()) { - case 1: - return extract(bool{}, traced_value.ints()); - case 8: - return extract(uint8_t{}, traced_value.uints()); - case 16: - return extract(uint16_t{}, traced_value.uints()); - case 32: - return extract(uint32_t{}, traced_value.uints()); - case 64: - return extract(uint64_t{}, traced_value.uints()); - } - break; - case TracedValue::INTEGRAL: - switch (traced_value.bit_width()) { - case 8: - return extract(int8_t{}, traced_value.ints()); - case 16: - return extract(int16_t{}, traced_value.ints()); - case 32: - return extract(int32_t{}, traced_value.ints()); - case 64: - return extract(int64_t{}, traced_value.ints()); - } - break; - case TracedValue::COMPLEX: - switch (traced_value.bit_width()) { - case 64: - return extract_complex(traced_value.floats()); - case 128: - return extract_complex(traced_value.doubles()); - } - break; - case TracedValue::TUPLE: - Tuple result; - for (const auto& elem : traced_value.tuple_elements()) { - TF_ASSIGN_OR_RETURN(auto converted, TracedValueToValue(elem)); - result.values.push_back( - std::make_shared(std::move(converted))); - } - return {{std::move(result)}}; - } - return tsl::errors::InvalidArgument("unexpected type: " + - traced_value.DebugString()); -} - -llvm::SmallVector FindOpExecutionsInTrace( - const ExecutionTrace& trace, mlir::Operation* op) { - llvm::SmallVector region_indices; - llvm::SmallVector op_indices; - - std::function get_op_path; - get_op_path = [&](mlir::Operation* op) { - auto* parent = op->getParentOp(); - if (!llvm::isa(parent)) { - get_op_path(parent); - region_indices.push_back(op->getParentRegion()->getRegionNumber()); - } - - int64_t index = 0; - while ((op = op->getPrevNode()) != nullptr) ++index; - op_indices.push_back(index); - }; - get_op_path(op); - - llvm::SmallVector result; - std::function step; - step = [&](const RegionTrace& trace, int index) { - auto& instruction_trace = trace.instructions(op_indices[index]); - if (region_indices.size() > index) { - for (const auto& region : instruction_trace.regions()) { - if (region.region_number() == region_indices[index]) { - step(region, index + 1); - } - } - } else { - result.push_back(&instruction_trace); - } - }; - step(trace.trace(), 0); - - return result; -} - -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.h b/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.h deleted file mode 100644 index a152e0e8f90652..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_MLIR_TOOLS_MLIR_REPLAY_PUBLIC_EXECUTION_TRACE_UTILS_H_ -#define XLA_MLIR_TOOLS_MLIR_REPLAY_PUBLIC_EXECUTION_TRACE_UTILS_H_ - -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/Region.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/literal.h" -#include "xla/mlir/tools/mlir_replay/public/execution_trace.pb.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h" -#include "xla/xla_data.pb.h" -#include "tsl/platform/statusor.h" - -namespace mlir { -namespace interpreter { - -// Interpreter listener that builds a trace of all executed ops and regions. -class ExecutionTraceListener : public InterpreterListener { - public: - explicit ExecutionTraceListener(ExecutionTrace* trace) : trace_(trace) {} - - void beforeOp(ArrayRef args, Operation* op) override; - void afterOp(ArrayRef results) override; - void enterRegion(ArrayRef bbargs, Region& region) override; - void leaveRegion(ArrayRef yielded) override; - - private: - ExecutionTrace* trace_; - SmallVector regions_; -}; - -// Returns an attribute with the given contents and type. -llvm::SmallVector ValueToAttribute( - const InterpreterValue& value, mlir::Type type); - -// Deserializes the given literal. -tsl::StatusOr LiteralToValue( - const xla::LiteralProto& literal); -// Deserializes the given literal and then casts it to the given type. -tsl::StatusOr LiteralToValue(const xla::LiteralProto& literal, - mlir::Type type); - -// Deserializes the given literal. -tsl::StatusOr LiteralToValue(const xla::Literal& literal); - -// Serializes the given interpreter value. -TracedValue ValueToTracedValue(const InterpreterValue& value); - -// Deserializes the given traced value. -tsl::StatusOr TracedValueToValue( - const TracedValue& traced_value); - -// Returns all executions of the given op in the given trace. -llvm::SmallVector FindOpExecutionsInTrace( - const ExecutionTrace& trace, mlir::Operation* op); - -} // namespace interpreter -} // namespace mlir - -#endif // XLA_MLIR_TOOLS_MLIR_REPLAY_PUBLIC_EXECUTION_TRACE_UTILS_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils_test.cc b/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils_test.cc deleted file mode 100644 index c8cdfe6125412f..00000000000000 --- a/third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils_test.cc +++ /dev/null @@ -1,138 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" - -#include -#include -#include -#include -#include -#include - -#include -#include "llvm/ADT/STLExtras.h" -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "xla/literal_util.h" -#include "xla/mlir_hlo/tools/mlir_interpreter/framework/interpreter_value.h" -#include "tsl/platform/statusor.h" - -namespace mlir { -namespace interpreter { -namespace { - -class TracedValueRoundTripTest - : public ::testing::TestWithParam {}; - -TEST_P(TracedValueRoundTripTest, Run) { - auto traced_value = ValueToTracedValue(GetParam()); - TF_ASSERT_OK_AND_ASSIGN(auto value, TracedValueToValue(traced_value)); - EXPECT_EQ(GetParam(), value) << GetParam().toString(); -} - -template -InterpreterValue MakeTensor(ArrayRef shape, ArrayRef values) { - auto result = TensorOrMemref::empty(shape); - for (auto [indices, value] : llvm::zip(result.view.indices(), values)) { - result.at(indices) = value; - } - return {result}; -} - -template -std::shared_ptr WrapShared(T value) { - return std::make_shared(std::move(value)); -} - -INSTANTIATE_TEST_SUITE_P( - RoundTrip, TracedValueRoundTripTest, - ::testing::ValuesIn(std::vector{ - {uint8_t{42}}, - {uint16_t{43}}, - {uint32_t{44}}, - {uint64_t{45}}, - {int8_t{-47}}, - {int16_t{-48}}, - {int32_t{-49}}, - {int64_t{-50}}, - {float{42.0}}, - {double{42.0}}, - {std::complex{1.0, 2.0}}, - {std::complex{3.0, 4.0}}, - {true}, - {false}, - {MakeTensor({1, 2}, {42, 43})}, - {MakeTensor({2, 2}, {1.0, -INFINITY, INFINITY, NAN})}, - {MakeTensor>({}, {{1.0, 2.0}})}, - {Tuple{SmallVector>{ - WrapShared(InterpreterValue{42}), - WrapShared(InterpreterValue{43.0}), - }}}})); - -class FromLiteralTest - : public ::testing::TestWithParam< - std::pair, InterpreterValue>> {}; - -TEST_P(FromLiteralTest, Run) { - TF_ASSERT_OK_AND_ASSIGN(auto value, LiteralToValue(*GetParam().first)); - EXPECT_EQ(value, GetParam().second) - << value.toString() << " vs " << GetParam().second.toString(); -} - -std::vector, InterpreterValue>> -MakeInputs() { - using ::xla::LiteralUtil; - return { - {WrapShared(LiteralUtil::CreateR2({{41, 42}})), - MakeTensor({1, 2}, {41, 42})}, - {WrapShared(LiteralUtil::CreateR0(43)), - MakeTensor({}, {43})}, - {WrapShared(LiteralUtil::CreateR0(44)), - MakeTensor({}, {44})}, - {WrapShared(LiteralUtil::CreateR0(45)), - MakeTensor({}, {45})}, - {WrapShared(LiteralUtil::CreateR0(46)), - MakeTensor({}, {46})}, - {WrapShared(LiteralUtil::CreateR0(47)), - MakeTensor({}, {47})}, - {WrapShared(LiteralUtil::CreateR0(48)), - MakeTensor({}, {48})}, - {WrapShared(LiteralUtil::CreateR0(49)), - MakeTensor({}, {49})}, - {WrapShared(LiteralUtil::CreateR0(50.0)), - MakeTensor({}, {50.0})}, - {WrapShared(LiteralUtil::CreateR0(51.0)), - MakeTensor({}, {51.0})}, - {WrapShared(LiteralUtil::CreateR0>({52.0, 53.0})), - MakeTensor>({}, {{52.0, 53.0}})}, - {WrapShared(LiteralUtil::CreateR0>({54.0, 55.0})), - MakeTensor>({}, {{54.0, 55.0}})}, - {WrapShared(LiteralUtil::CreateR1({true, false})), - MakeTensor({2}, {true, false})}, - {WrapShared( - LiteralUtil::MakeTupleOwned(LiteralUtil::CreateR0(true), - LiteralUtil::CreateR0(56))), - InterpreterValue{Tuple{SmallVector>{ - std::make_shared(MakeTensor({}, {true})), - std::make_shared( - MakeTensor({}, {56}))}}}}}; -} - -INSTANTIATE_TEST_SUITE_P(Test, FromLiteralTest, - ::testing::ValuesIn(MakeInputs())); - -} // namespace -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir_hlo/BUILD b/third_party/xla/xla/mlir_hlo/BUILD index 66e05085683514..f55e0dad52f3f6 100644 --- a/third_party/xla/xla/mlir_hlo/BUILD +++ b/third_party/xla/xla/mlir_hlo/BUILD @@ -2023,120 +2023,3 @@ cc_binary( linkstatic = False, deps = ["@llvm-project//mlir:mlir_c_runner_utils"], ) - -cc_library( - name = "mlir_interpreter_dialects", - srcs = glob( - [ - "tools/mlir_interpreter/dialects/*.cc", - ], - exclude = ["tools/mlir_interpreter/dialects/util.cc"], - ), - strip_include_prefix = ".", - visibility = ["//visibility:public"], - deps = [ - ":deallocation", - ":gml_st", - ":mlir_hlo", - ":mlir_interpreter_dialect_utils", - ":mlir_interpreter_framework", - ":thlo", - "@com_google_absl//absl/strings", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:AffineDialect", - "@llvm-project//mlir:ArithDialect", - "@llvm-project//mlir:BufferizationDialect", - "@llvm-project//mlir:ComplexDialect", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:LinalgDialect", - "@llvm-project//mlir:MemRefDialect", - "@llvm-project//mlir:SCFDialect", - "@llvm-project//mlir:TensorDialect", - "@llvm-project//mlir:VectorDialect", - "@llvm-project//mlir:ViewLikeInterface", - ], - alwayslink = 1, -) - -cc_library( - name = "mlir_interpreter_dialect_utils", - srcs = [ - "tools/mlir_interpreter/dialects/util.cc", - ], - hdrs = [ - "tools/mlir_interpreter/dialects/comparators.h", - "tools/mlir_interpreter/dialects/cwise_math.h", - "tools/mlir_interpreter/dialects/util.h", - ], - strip_include_prefix = ".", - visibility = ["//visibility:public"], - deps = [ - ":mlir_interpreter_framework", - "@com_google_absl//absl/strings", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:ArithDialect", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Support", - "@llvm-project//mlir:TensorDialect", - "@llvm-project//mlir:ViewLikeInterface", - ], -) - -cc_library( - name = "mlir_interpreter_framework", - srcs = [ - "tools/mlir_interpreter/framework/interpreter.cc", - "tools/mlir_interpreter/framework/interpreter_value.cc", - "tools/mlir_interpreter/framework/registration.cc", - "tools/mlir_interpreter/framework/tensor_or_memref.cc", - ], - hdrs = [ - "tools/mlir_interpreter/framework/interpreter.h", - "tools/mlir_interpreter/framework/interpreter_value.h", - "tools/mlir_interpreter/framework/interpreter_value_util.h", - "tools/mlir_interpreter/framework/registration.h", - "tools/mlir_interpreter/framework/tensor_or_memref.h", - ], - strip_include_prefix = ".", - visibility = ["//visibility:public"], - deps = [ - "@com_google_absl//absl/strings", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:DialectUtils", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Support", - "@local_tsl//tsl/platform:logging", - ], -) - -build_test( - name = "mlir-interpreter-runner_build_test", - targets = [ - ":mlir-interpreter-runner", - ], -) - -cc_binary( - name = "mlir-interpreter-runner", - srcs = ["tools/mlir_interpreter/mlir-interpreter-runner.cc"], - deps = [ - ":deallocation", - ":gml_st", - ":hlo_dialect_registration", - ":lhlo", - ":lhlo_gpu", - ":mhlo_passes", - ":mlir_interpreter_dialects", - ":mlir_interpreter_framework", - ":thlo", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:AllPassesAndDialects", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:MlirReduceLib", - "@llvm-project//mlir:Pass", - "@llvm-project//mlir:Support", - ], -) diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/affine.cc b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/affine.cc deleted file mode 100644 index 6cb8f0b85465f2..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/affine.cc +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "tools/mlir_interpreter/dialects/util.h" -#include "tools/mlir_interpreter/framework/interpreter.h" -#include "tools/mlir_interpreter/framework/interpreter_value_util.h" -#include "tools/mlir_interpreter/framework/registration.h" - -namespace mlir { -namespace interpreter { -namespace { - -llvm::SmallVector apply(InterpreterState&, affine::AffineApplyOp op, - ArrayRef operands) { - return evalAffineMap(op.getAffineMap(), operands); -} - -int64_t min(InterpreterState&, affine::AffineMinOp op, - ArrayRef operands) { - auto results = evalAffineMap(op.getAffineMap(), operands); - return *std::min_element(results.begin(), results.end()); -} - -int64_t max(InterpreterState&, affine::AffineMaxOp op, - ArrayRef operands) { - auto results = evalAffineMap(op.getAffineMap(), operands); - return *std::max_element(results.begin(), results.end()); -} - -REGISTER_MLIR_INTERPRETER_OP(apply); -REGISTER_MLIR_INTERPRETER_OP(max); -REGISTER_MLIR_INTERPRETER_OP(min); - -} // namespace -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/arith.cc b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/arith.cc deleted file mode 100644 index d11c9d5342d2fb..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/arith.cc +++ /dev/null @@ -1,305 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/Dialect/Arith/IR/Arith.h" - -#include // NOLINT - -#include "llvm/Support/ErrorHandling.h" -#include "tools/mlir_interpreter/dialects/comparators.h" -#include "tools/mlir_interpreter/dialects/cwise_math.h" -#include "tools/mlir_interpreter/framework/interpreter.h" -#include "tools/mlir_interpreter/framework/interpreter_value.h" -#include "tools/mlir_interpreter/framework/interpreter_value_util.h" -#include "tools/mlir_interpreter/framework/registration.h" - -namespace mlir { -namespace interpreter { -namespace { - -InterpreterValue bitcast(InterpreterState&, arith::BitcastOp op, - const InterpreterValue& in) { - Type ty = op->getResultTypes()[0]; - auto shapedTy = ty.dyn_cast(); - auto result = dispatchScalarType(ty, [&](auto dummy) -> InterpreterValue { - TensorOrMemref result; - result.view = {}; - if (shapedTy) { - result.buffer = in.clone().buffer(); - } else { - result.buffer = in.asUnitTensor().buffer(); - } - return {result}; - }); - if (!shapedTy) { - return result.extractElement({}); - } - auto& outView = result.view(); - outView.strides = BufferView::getDefaultStrides(shapedTy.getShape()); - outView.sizes = llvm::to_vector(shapedTy.getShape()); - return result; -} - -InterpreterValue constant(InterpreterState&, arith::ConstantOp constant) { - auto ty = constant->getResultTypes()[0]; - auto shapedType = ty.dyn_cast(); - auto elemTy = shapedType ? shapedType.getElementType() : ty; - return dispatchScalarType(elemTy, [&](auto dummy) -> InterpreterValue { - using T = decltype(dummy); - if (shapedType) { - auto values = - constant.getValue().cast().getValues(); - auto result = TensorOrMemref::empty(shapedType.getShape()); - auto valueIt = values.begin(); - result.view.isVector = shapedType.isa(); - for (const auto& index : result.view.indices(true)) { - result.at(index) = *valueIt; - ++valueIt; - } - return {result}; - } - - auto value = constant.getValue(); - if (auto integer = value.dyn_cast()) { - return {static_cast(integer.getInt())}; - } - if (auto floatValue = value.dyn_cast()) { - return {static_cast(floatValue.getValueAsDouble())}; - } - - llvm_unreachable("unsupported constant type"); - }); -} - -template -InterpreterValue intCast(InterpreterState&, Op op, - const InterpreterValue& arg) { - if (arg.isTensor()) { - return dispatchScalarType( - op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { - auto result = TensorOrMemref::emptyLike(arg.view()); - for (const auto& index : result.view.indices()) { - result.at(index) = - static_cast(arg.extractElement(index).asInt()); - } - return {result}; - }); - } - - return dispatchScalarType( - op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { - return {static_cast(arg.asInt())}; - }); -} - -template -InterpreterValue floatCast(InterpreterState&, Op op, - const InterpreterValue& arg) { - if (arg.isTensor()) { - return dispatchScalarType( - op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { - auto result = TensorOrMemref::emptyLike(arg.view()); - for (const auto& index : result.view.indices()) { - result.at(index) = static_cast( - arg.extractElement(index).asDouble()); - } - return {result}; - }); - } - - return dispatchScalarType( - op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { - return {static_cast(arg.asDouble())}; - }); -} - -llvm::SmallVector uiToFP( - MutableArrayRef args, mlir::Operation* op, - InterpreterState&) { - if (args[0].isTensor()) { - auto ty = op->getResultTypes()[0].cast(); - return {dispatchScalarType( - ty.getElementType(), [&](auto dummy) -> InterpreterValue { - auto result = - TensorOrMemref::emptyLike(args[0].view()); - for (const auto& index : result.view.indices()) { - result.at(index) = static_cast( - args[0].extractElement(index).asUInt()); - } - return {result}; - })}; - } - - return {dispatchScalarType( - op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { - return {static_cast(args[0].asUInt())}; - })}; -} - -InterpreterValue cmpI(InterpreterState&, arith::CmpIOp compare, - const InterpreterValue& lhs, - const InterpreterValue& rhs) { - switch (compare.getPredicate()) { - case arith::CmpIPredicate::eq: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::ne: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::slt: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::sle: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::sgt: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::sge: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::ult: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::ule: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::ugt: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpIPredicate::uge: - return applyCwiseBinaryMap(lhs, rhs); - } -} - -template -struct ConstFunctor : CwiseAll { - template - static bool apply(T, T) { - return value; - } -}; - -InterpreterValue cmpF(InterpreterState&, arith::CmpFOp compare, - const InterpreterValue& lhs, - const InterpreterValue& rhs) { - switch (compare.getPredicate()) { - case arith::CmpFPredicate::AlwaysFalse: - return applyCwiseBinaryMap>(lhs, rhs); - case arith::CmpFPredicate::OEQ: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::OGT: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::OGE: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::OLT: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::OLE: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::ONE: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::ORD: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::UEQ: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::UGT: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::UGE: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::ULT: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::ULE: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::UNE: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::UNO: - return applyCwiseBinaryMap(lhs, rhs); - case arith::CmpFPredicate::AlwaysTrue: - return applyCwiseBinaryMap>(lhs, rhs); - } -} - -InterpreterValue select(InterpreterState& state, arith::SelectOp, - const InterpreterValue& cond, - const InterpreterValue& trueValue, - const InterpreterValue& falseValue) { - if (std::holds_alternative(cond.storage)) { - return std::get(cond.storage) ? trueValue : falseValue; - } - - if (!cond.isTensor() && !cond.view().isVector) { - state.addFailure("select requires a scalar or vector argument"); - return {}; - } - - auto ret = trueValue.clone(); - for (const auto& index : cond.view().indices(/*includeVectorDims=*/true)) { - if (cond.extractElement(index).asInt() == 0) { - ret.insertElement(index, falseValue.extractElement(index)); - } - } - return ret; -} - -template -struct ExtFFunctor : CwiseFloat { - template - static R apply(A v) { - return v; - } -}; - -InterpreterValue extF(InterpreterState&, arith::ExtFOp op, - const InterpreterValue& in) { - return dispatchScalarType( - op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { - return applyCwiseMap>(in); - }); -} - -REGISTER_MLIR_INTERPRETER_OP("arith.addf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.andi", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.divf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.extui", uiToFP); -REGISTER_MLIR_INTERPRETER_OP("arith.maxf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.minf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.mulf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.negf", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("arith.ori", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.remf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.subf", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.uitofp", uiToFP); -REGISTER_MLIR_INTERPRETER_OP("arith.xori", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.shrui", - applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.shrsi", - applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("arith.shli", applyCwiseBinaryMap); - -// The float implementations support ints too. -REGISTER_MLIR_INTERPRETER_OP("arith.addi", "arith.addf"); -REGISTER_MLIR_INTERPRETER_OP("arith.divsi", "arith.divf"); -REGISTER_MLIR_INTERPRETER_OP("arith.maxsi", "arith.maxf"); -REGISTER_MLIR_INTERPRETER_OP("arith.minsi", "arith.minf"); -REGISTER_MLIR_INTERPRETER_OP("arith.muli", "arith.mulf"); -REGISTER_MLIR_INTERPRETER_OP("arith.remsi", "arith.remf"); -REGISTER_MLIR_INTERPRETER_OP("arith.subi", "arith.subf"); - -REGISTER_MLIR_INTERPRETER_OP(bitcast); -REGISTER_MLIR_INTERPRETER_OP(cmpF); -REGISTER_MLIR_INTERPRETER_OP(cmpI); -REGISTER_MLIR_INTERPRETER_OP(constant); -REGISTER_MLIR_INTERPRETER_OP(extF); -REGISTER_MLIR_INTERPRETER_OP(floatCast); -REGISTER_MLIR_INTERPRETER_OP(intCast); -REGISTER_MLIR_INTERPRETER_OP(intCast); -REGISTER_MLIR_INTERPRETER_OP(intCast); -REGISTER_MLIR_INTERPRETER_OP(intCast); -REGISTER_MLIR_INTERPRETER_OP(select); - -} // namespace -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/bufferization.cc b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/bufferization.cc deleted file mode 100644 index 1b65950c4d79c7..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/bufferization.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" - -#include // NOLINT -#include // NOLINT - -#include "tools/mlir_interpreter/dialects/util.h" -#include "tools/mlir_interpreter/framework/interpreter.h" -#include "tools/mlir_interpreter/framework/registration.h" - -namespace mlir { -namespace interpreter { -namespace { - -InterpreterValue toTensor(InterpreterState&, bufferization::ToTensorOp, - const InterpreterValue& in) { - return in.clone(); -} - -InterpreterValue toMemref(InterpreterState&, bufferization::ToMemrefOp, - const InterpreterValue& in) { - return in; -} - -InterpreterValue allocTensor( - InterpreterState&, bufferization::AllocTensorOp alloc, - ArrayRef dynamicSizes, std::optional copy, - const std::optional& /*sizeHint*/) { - auto ty = alloc->getResultTypes().front().cast(); - auto shape = replaceDynamicVals(ty.getShape(), dynamicSizes); - - if (copy) { - return copy->clone(); - } - return InterpreterValue::makeTensor(ty.getElementType(), shape); -} - -InterpreterValue clone(InterpreterState& state, bufferization::CloneOp, - const InterpreterValue& in) { - if (auto* stats = state.getOptions().stats) { - stats->heapSize += in.buffer()->getByteSize(); - stats->peakHeapSize = std::max(stats->peakHeapSize, stats->heapSize); - ++stats->numAllocations; - } - return in.clone(); -} - -REGISTER_MLIR_INTERPRETER_OP(allocTensor); -REGISTER_MLIR_INTERPRETER_OP(clone); -REGISTER_MLIR_INTERPRETER_OP(toMemref); -REGISTER_MLIR_INTERPRETER_OP(toTensor); - -} // namespace -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/builtin.cc b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/builtin.cc deleted file mode 100644 index 19e039db0c7108..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/builtin.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tools/mlir_interpreter/framework/interpreter.h" -#include "tools/mlir_interpreter/framework/registration.h" - -namespace mlir { -namespace interpreter { -namespace { - -llvm::SmallVector unrealizedConversionCast( - MutableArrayRef args, mlir::Operation* op, - InterpreterState&) { - auto resultTy = op->getResultTypes()[0]; - auto operandTy = op->getOperandTypes()[0]; - if (resultTy == operandTy) { - return {args[0]}; - } - - if (auto r = llvm::dyn_cast(resultTy)) { - if (auto o = llvm::dyn_cast(operandTy)) { - if (verifyCompatibleShapes({o, r}).succeeded()) { - return {dispatchScalarType(r, [&](auto dummy) -> InterpreterValue { - TensorOrMemref result; - result.view = args[0].view(); - result.buffer = args[0].buffer(); - return {result}; - })}; - } - } - } - - llvm::errs() << "Unimplemented cast: " << *op << "\n"; - llvm_unreachable("unimplemented cast"); -} - -REGISTER_MLIR_INTERPRETER_OP("builtin.unrealized_conversion_cast", - unrealizedConversionCast); - -} // namespace -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/comparators.h b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/comparators.h deleted file mode 100644 index 397f95c36c2a77..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/comparators.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef MLIR_HLO_TOOLS_MLIR_INTERPRETER_DIALECTS_COMPARATORS_H_ -#define MLIR_HLO_TOOLS_MLIR_INTERPRETER_DIALECTS_COMPARATORS_H_ - -#include -#include - -#include "llvm/Support/ErrorHandling.h" -#include "tools/mlir_interpreter/framework/interpreter_value_util.h" - -namespace mlir { -namespace interpreter { - -// Despite the name, this works on integers and complex too. -template -struct FloatCompare : CwiseAll { - template - static bool apply(T a, T b) { - if (isnan(a) || isnan(b)) return nan_result; - if constexpr (v == 0) { - // For complex eq/ne. - return (a == b) == r; - } else if constexpr (std::is_floating_point_v || std::is_integral_v) { - auto cmp = a > b ? 1 : (a < b ? -1 : 0); - return (cmp == v) == r; - } else { - llvm_unreachable("operation not supported for this type"); - } - } - - template - static bool isnan(T a) { - return std::isnan(a); - } - template - static bool isnan(std::complex a) { - return std::isnan(std::real(a)) || std::isnan(std::imag(a)); - } -}; - -using Foeq = FloatCompare<0, true, false>; -using Foge = FloatCompare<-1, false, false>; -using Fogt = FloatCompare<1, true, false>; -using Fole = FloatCompare<1, false, false>; -using Folt = FloatCompare<-1, true, false>; -using Fone = FloatCompare<0, false, false>; -using Ford = FloatCompare<99, false, false>; -using Fueq = FloatCompare<0, true, true>; -using Fuge = FloatCompare<-1, false, true>; -using Fugt = FloatCompare<1, true, true>; -using Fule = FloatCompare<1, false, true>; -using Fult = FloatCompare<-1, true, true>; -using Fune = FloatCompare<0, false, true>; -using Funo = FloatCompare<99, true, true>; - -template -struct UnsignedCompare : CwiseInt { - template - static bool apply(T a, T b) { - using U = std::make_unsigned_t; - auto aU = static_cast(a); - auto bU = static_cast(b); - auto cmp = aU > bU ? 1 : (aU < bU ? -1 : 0); - return (cmp == v) == r; - } -}; - -using Iuge = UnsignedCompare<-1, false>; -using Iule = UnsignedCompare<1, false>; -using Iugt = UnsignedCompare<1, true>; -using Iult = UnsignedCompare<-1, true>; - -struct Iumax { - template - static T apply(T a, T b) { - return Iuge::apply(a, b) ? a : b; - } -}; - -struct Iumin { - template - static T apply(T a, T b) { - return Iule::apply(a, b) ? a : b; - } -}; - -} // namespace interpreter -} // namespace mlir - -#endif // MLIR_HLO_TOOLS_MLIR_INTERPRETER_DIALECTS_COMPARATORS_H_ diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/complex.cc b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/complex.cc deleted file mode 100644 index 2f420ab68d1fec..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/complex.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/Dialect/Complex/IR/Complex.h" - -#include "tools/mlir_interpreter/dialects/cwise_math.h" -#include "tools/mlir_interpreter/framework/interpreter_value_util.h" -#include "tools/mlir_interpreter/framework/registration.h" - -namespace mlir { -namespace interpreter { -namespace { - -InterpreterValue constant(InterpreterState&, complex::ConstantOp constant) { - auto ty = constant->getResultTypes()[0]; - return dispatchScalarType(ty, [&](auto dummy) -> InterpreterValue { - if constexpr (is_complex_v) { - using T = typename decltype(dummy)::value_type; - auto values = - llvm::to_vector(constant.getValue().getAsValueRange()); - return {decltype(dummy){static_cast(values[0].convertToDouble()), - static_cast(values[1].convertToDouble())}}; - } else { - llvm_unreachable("invalid constant"); - } - }); -} - -REGISTER_MLIR_INTERPRETER_OP("complex.abs", "math.absf"); -REGISTER_MLIR_INTERPRETER_OP("complex.add", "arith.addf"); -REGISTER_MLIR_INTERPRETER_OP("complex.cos", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.create", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("complex.div", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("complex.exp", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.expm1", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.im", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.log", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.log1p", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.mul", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("complex.neg", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.pow", applyCwiseBinaryMap); -REGISTER_MLIR_INTERPRETER_OP("complex.re", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.rsqrt", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.sin", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.sqrt", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP("complex.tanh", applyCwiseMap); -REGISTER_MLIR_INTERPRETER_OP(constant); - -} // namespace -} // namespace interpreter -} // namespace mlir diff --git a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/cwise_math.h b/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/cwise_math.h deleted file mode 100644 index 3f2274116f5dbc..00000000000000 --- a/third_party/xla/xla/mlir_hlo/tools/mlir_interpreter/dialects/cwise_math.h +++ /dev/null @@ -1,239 +0,0 @@ -/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef MLIR_HLO_TOOLS_MLIR_INTERPRETER_DIALECTS_CWISE_MATH_H_ -#define MLIR_HLO_TOOLS_MLIR_INTERPRETER_DIALECTS_CWISE_MATH_H_ - -#include -#include - -#include "tools/mlir_interpreter/framework/interpreter_value_util.h" - -namespace mlir { -namespace interpreter { - -struct ATan2 : CwiseReal { - template - static T apply(T a, T b) { - return std::atan2(a, b); - } -}; - -struct Clz : CwiseInt { - template - static T apply(T a) { - if (!a) { - // Return something well-defined for zeroes. - return sizeof(T{}) * CHAR_BIT; - } - return __builtin_clzl( - static_cast(static_cast>(a))) - - (sizeof(uint64_t) - sizeof(T{})) * CHAR_BIT; - } -}; - -struct Ctz : CwiseInt { - template - static T apply(T a) { - if (!a) { - // Return something well-defined for zeroes. - return sizeof(T{}) * CHAR_BIT; - } - return __builtin_ctzl(static_cast(a)); - } -}; - -struct Complex : CwiseFloat { - template - static std::complex apply(T a, T b) { - return {a, b}; - } -}; - -struct Max : CwiseReal { - template - static T apply(T a, T b) { - return std::max(a, b); - } -}; - -struct Min : CwiseReal { - template - static T apply(T a, T b) { - return std::min(a, b); - } -}; - -struct Power : CwiseArith { - template - static T apply(T a, T b) { - if constexpr (std::is_integral_v) { - if constexpr (std::is_signed_v) { - if (b < 0) { - return a == 1 ? 1 : 0; - } - } - T result = 1; - while (b > 0) { - if (b & 1) result *= a; - b >>= 1; - if (b) { - a *= a; - } - } - return result; - } else { - return std::pow(a, b); - } - } -}; - -struct Remainder : CwiseReal { - template - static T apply(T a, T b) { - if constexpr (std::is_integral_v) { - return a % b; - } else { - return std::fmod(a, b); - } - } -}; - -struct ShiftRightArith : CwiseInt { - template - static T apply(T a, T b) { - return b >= sizeof(T) * CHAR_BIT ? 0 : (a >> b); - } -}; - -struct ShiftRightLogical : CwiseInt { - template - static T apply(T a, T b) { - return b >= sizeof(T) * CHAR_BIT - ? 0 - : static_cast>(a) >> b; - } -}; - -struct ShiftLeft : CwiseInt { - template - static T apply(T a, T b) { - return b >= sizeof(T) * CHAR_BIT ? 0 : (a << b); - } -}; - -namespace detail { -template